Further top this question: PDF::FromHTML - Corrupt file and no output
The code in question is 'working' in that it produces a PDF document just fine, just NONE of the HTML anchors are being translated, and on larger documents the processing ceases at Page 11 of the PDF - with no error, it closes the document just fine!
Edit: To save looking at the Question Link:
# print "<p>".$textblob."</p>";
$textblob='<html><head></head><body>'.$textblob.'</body></html>';
# $textblob = decode('UTF-8', $textblob);
my $output;
if(defined($query->param('PDF'))){
my $pdf = PDF::FromHTML->new( encoding => 'utf-8' );
$pdf->load_file(\$textblob);
$pdf->convert(
# With PDF::API2, font names such as 'traditional' also works
Font => 'Arial',
LineHeight => 10,
Landscape => 0,
);
$pdf->write_file(\$output);
print $output;
}
$textblob when uncommented to print and commenting out the PDF section displays the full 400 reference adventure with links in html just fine...
Update:In desperation here is the entire script (it's not TOO long...)
#!/usr/bin/perl
use cPanelUserConfig;
use CGI::Carp qw(fatalsToBrowser);
use CGI;
use List::Util qw(shuffle);
use PDF::FromHTML;
require "authenticate.pl";
$query = new CGI;
if(defined($query->param('PDF'))){
print $query->header(-type=>'application/pdf');
}
else{
print $query->header(-charset=>'utf-8');
&html_header;
print "\n\n\n\n<!-- -------------------------- BEGIN: ff.net Script generated text ------------------------------------------- -->";
print "Randomise working? Let me know if you find a bug.<br />";
}
if(defined($query->param('doc'))){
$doc=$query->param('doc')."\nEOF";
%refhash = $doc =~ /^[\n\s\t\.\#]*(\d+)[\s\t\.\#\n]+(?!\n*^[\n\s\t\.\#]*\d+[\s\t\.\#\n]+)(.+?)(?=^[\s\t\.\#\n]*\d+[\s\t\.\#\n]+|EOF)/smcgi; # refhash{key}=content, where key==refnumber and content==well, ref content
&display_refhash(\%refhash);
}
elsif(defined($query->param('references'))){
my %anchors;
my $refhashref=&recreate_refhash($query->param('references'),\%anchors);
if(defined($query->param('Randomise'))){
$refhashref=&randomise($refhashref,\%anchors);
print "Your adventure looks like this: <br /><br />";
&display_refhash($refhashref);
}
elsif(defined($query->param('Save'))){
&save($refhashref);
}
elsif(defined($query->param('Auto-HTML Tag'))){
print "Your adventure looks like this: <br /><br />";
&display_refhash($refhashref);
}
elsif(defined($query->param('Auto-ABML Tag'))){
&autoABML($refhashref);
print "Your adventure looks like this: <br /><br />";
&display_refhash($refhashref);
}
elsif(defined($query->param('PDF'))){
&output_pdf($refhashref);
}
else{
print "undefined function call";
}
}
else{ # output form to input doc content
print "Please input your document text into the textarea below (copy and paste should do it):";
print '<form method="post" action="doc_to_refs.cgi" enctype="multipart/form-data" name="doc_to_refs_form">';
print $query->textarea(-name=>'doc',-rows=>20,-cols=>100, -style=>"font-family:arial;width:98%");
print $query->submit('Go!');
print '</form>';
}
&html_footer;
# print "<!-- -------------------------- END: ff.net Script generated text ------------------------------------------- -->";
sub recreate_refhash{
my %refhash;
my $references=shift;
my $anchors_ref=shift;
for(my $x=0;$x<$references;$x++){
my $referencekey="reference"."$x";
my $referencecontent="reftext"."$x";
my $anchorname="anchor"."$x";
my $deletename="delete"."$x";
if(!defined($query->param($deletename))){
$refhash{$query->param($referencekey)}=$query->param($referencecontent);
if(defined($query->param($anchorname))){
$$anchors_ref{$query->param($anchorname)}=$x;
}
}
}
return \%refhash;
}
sub randomise{
my $refhashref=shift;
my $anchor_ref=shift;
my %refhash=%$refhashref;
my %randomisedrefhash, %Xrefhash, @refstack, $ref;
my %anchors=%$anchor_ref;
# randomise the list
@refstack=shuffle sort {$a <=> $b} keys %refhash; # inflict an order on the pre-shuffle (therefore we can xref predicatably?) not sure this makes ANY sense i'm melting....
## transpose anchors back to their required location
for($x=0;$x<@refstack;$x++){
if(defined($anchors{$refstack[$x]})){
my $anchor=\$refstack[$anchors{$refstack[$x]}];
my $temp=$refstack[$x];
$refstack[$x]=$$anchor;
print "---Swapping $temp with ".$$anchor;
$$anchor=$temp;
if(defined($anchors{$refstack[$x]})){
if($refstack[$anchors{$refstack[$x]}] ne $$anchor){
$x--;
}
}
}
}
## randomise the refs and the content associations, and create the cross-ref hash
foreach $ref(sort {$a <=> $b} keys %refhash){
$key=shift @refstack;
$randomisedrefhash{$ref}=$refhash{$key};
$Xrefhash{$key}=$ref;
}
## now do the content link substitutions
foreach $ref(keys %randomisedrefhash){
$randomisedrefhash{$ref}=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s)*(\d+)/&substitute_xref($1,$2,$3,$4,$5,\%Xrefhash)/egi;
}
print "You asked for the following anchors:";
foreach $key(keys %anchors){
print $anchors{$key};
}
return \%randomisedrefhash;
}
sub substitute_xref{ ## not sure that this is necessary but the verboseness was easier to work out
my $pretext1=shift;
my $pretext2=shift;
my $pretext3=shift;
my $pretext4=shift;
my $link=shift;
my $Xrefhashref=shift;
my %Xrefhash=%$Xrefhashref;
my $newlink=$Xrefhash{$link};
return "$pretext1$pretext2$pretext3$pretext4$newlink";
}
sub save{
print "Will Save soon";
}
sub display_refhash{
my $refhashref=shift;
my %refhash=%$refhashref;
print '<form method="post" action="doc_to_refs.cgi" enctype="multipart/form-data" name="doc_to_refs_form">';
my $x=0;
my $ref,$textblob;
foreach $ref (sort {$a <=> $b} keys %refhash){
my $reference="reference"."$x";
my $reftext="reftext"."$x";
my $anchor="anchor"."$x";
my $delete="delete"."$x";
my $default=$refhash{$ref};
print "Reference is: ".$query->textfield(-name=>$reference,-value=>$ref, -override=>1)."<br />";
print $query->checkbox_group(-name=>$delete,-values=>$ref,-labels=>{$ref=>'Delete Me'})."<br />";
print $query->checkbox_group(-name=>$anchor,-values=>$ref, -labels=>{$ref=>'Anchor Me (Will NOT get Randomised)'})."<br />";
print "Content is: ".$query->textarea(-name=>$reftext, -default=>$default, -rows=>5, -override=>1, -cols=>100, -style=>"font-family:arial;width:98%")."<br />";
print "<br /><br />";
if((defined($query->param('Auto-HTML Tag'))) or (defined($query->param('PDF')))){
$ref=~s/(\d+)/\<a id\=\"$1\"\>$1\<\/a\>/gi;
$default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<a href\=\"\#$5\"\>$1 $2 $3 $4 $5\<\/a\>/gi;
}
if(defined($query->param('Auto-ABML Tag'))){
# $ref=~s/(\d+)/\<a id\=\"$1\"\>$1\<\/a\>/gi;
$default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<\;tt ref\=\"$5\"\>\;$1 $2 $3 $4 $5\<\;\/tt\>\;/gi;
}
$textblob.=$ref." ".$default."<br /><br />";
$x++;
}
print $query->hidden(-name=>'references',-value=>$x,override=>1);
# print $query->submit(-name=>'Save');
print $query->submit('Randomise');
print $query->submit('Auto-ABML Tag');
print $query->submit('Auto-HTML Tag');
print $query->submit('PDF');
print "</form><br /><br /><br />";
$textblob=~s/\n/\<br \/\>/gi;
print "<p>".$textblob."</p>";
}
sub output_pdf{
my $refhashref=shift;
my %refhash=%$refhashref;
my $x=0;
my $ref,$textblob;
foreach $ref (sort {$a <=> $b} keys %refhash){
my $reference="reference"."$x";
my $reftext="reftext"."$x";
my $anchor="anchor"."$x";
my $delete="delete"."$x";
my $default=$refhash{$ref};
if((defined($query->param('Auto-HTML Tag'))) or (defined($query->param('PDF')))){
$ref=~s/(\d+)/\<a id\=\"$1\"\>$1\<\/a\>/gi;
$default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<a href\=\"\#$5\"\>$1 $2 $3 $4 $5\<\/a\>/gi;
}
if(defined($query->param('Auto-ABML Tag'))){
# $ref=~s/(\d+)/\<a id\=\"$1\"\>$1\<\/a\>/gi;
$default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<\;tt ref\=\"$5\"\>\;$1 $2 $3 $4 $5\<\;\/tt\>\;/gi;
}
$textblob.=$ref." ".$default."<br /><br />";
$x++;
}
$textblob=~s/\n/\<br \/\>/gi;
# print "<p>".$textblob."</p>";
$textblob='<html><head></head><body>'.$textblob.'</body></html>';
my $output;
if(defined($query->param('PDF'))){
my $pdf = PDF::FromHTML->new( encoding => 'utf-8' );
$pdf->load_file(\$textblob);
$pdf->convert(
# With PDF::API2, font names such as 'traditional' also works
Font => 'Arial',
LineHeight => 10,
Landscape => 0,
);
$pdf->write_file(\$output);
print $output;
}
}
sub html_header{
}
sub html_footer{
}
If you want sample data let me know I'll upload it somewhere