]> www.wagner.pp.ru Git - fiction/Kate-the-Empress.git/blobdiff - Tex2fb2
Typo fixes
[fiction/Kate-the-Empress.git] / Tex2fb2
diff --git a/Tex2fb2 b/Tex2fb2
index 2d644dfdac9b0cc512d5ba3229ee4a0a05bc7c12..abc38bba4320338ddd5275b28ab8f23b90609fff 100755 (executable)
--- a/Tex2fb2
+++ b/Tex2fb2
@@ -1,32 +1,62 @@
 #!/usr/bin/perl -CDS
+
 use utf8;
+use POSIX qw(strftime);
+use MIME::Base64;
 # char-level modes
 my $poetry = 0;
 my $verbatim = 0;
 my @sections;
 my $buffer;
+my $idseq = 0; # sequentual number of footnotes
+my $footnotes="";
 #
 # TODO italic paragaphs
 # footnotes
 # epigraphs
 #
 # print fictionbook header
-print "<?xml version=\"1.0\" encoding=\"UTF-8\">\n";
+print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
 print "<FictionBook xmlns=\"http://www.gribuser.ru/xml/fictionbook/2.0\"
 xmlns:l=\"http://www.w3.org/1999/xlink\">\n";
 my $metadata = shift @ARGV;
 open F,"<",$metadata;
+my $pics = "";
 while (<F>) {
+       # Replace empty date with current date
+       if (/<date\s+\/>/ || /<date>\s*<\/date>/) {
+               $_ = "<date value=\"".strftime("%Y-%m-%d",localtime())."\">".
+                       strftime("%d/%m/%Y",localtime())."</date>\n";
+       }
+       # Add current to date as fractional part to version
+       if (/<version>(\d*)(.\d*)?<\/version>/) {
+               my $ver = $1+time()/1E10;
+               $_=tag(sprintf("%g",$ver),"version")."\n";
+       }
+       # Если  существует cover.png, добавляем coverpage
+       if (/<coverpage \/>/) {
+               $_="";
+               COVER:
+               for $file ("cover.jpg","cover.png") {
+                       if (-f $file) { 
+                               my $id = $file;
+                               $id=~tr/./_/;
+                               $_ = "<coverpage>\n<image l:href=\"#$id\" />\n</coverpage>\n";
+                               $pics .= mkbinary($file,$id);
+                               last COVER;
+                       }
+               }       
+       }
        print $_;
 }
 close F;
-
+print "<body>\n";
 my $header =1;
 LINE:
 while (<>) {
 $environ = undef;
 if (/\\(begin|end){(\w+)}/) {
-       $environ = $2;
+       $environ=$2;
        $begin=$1 eq "begin";
        if ($environ eq 'verbatim') {
                $verbatim=$begin;
@@ -34,68 +64,80 @@ if (/\\(begin|end){(\w+)}/) {
                if ($begin) {
                pushsection("poem",undef);
                } else {
-               flushsection('poem');
+                       add_to_section(tag($buffer,'stanza')."\n") if $buffer;
+                       $buffer="";
+                       flushsection('poem');
                }
                $poetry = $begin;
-       } elsif($environ = 'document' && $begin) {
+       } elsif($environ eq 'document' && $begin) {
                $header=0;
        }
 }
 next LINE if $header;
 if ((/^$/ || $environ) && $buffer) {
 #output on empty line (p or stanza) depending on poetry mode
-       add_to_section(tag($buffer,$poetry?"stanza":"p"));
+       add_to_section(tag(flushbuffer($buffer),$poetry?"stanza":"p")."\n");
        $buffer="";
 }
 next LINE if $environ;
 # Section headings
-if (/\\(part|chapter|section|subsection|subsubsection)\*?{(.*)}/) {
+if (/\\(part|chapter|section|subsection|subsubsection)\*?\{(.*)\}/) {
        if ($buffer) {
-               add_to_section(tag($buffer,$poetry?"stanza":"p"));
+               add_to_section(tag(flushbuffer($buffer),$poetry?"stanza":"p"));
                $buffer="";
        }
-       pushsection($1,$2);
+       pushsection($1,tag($2,"p"));
        next LINE;
 }
-if (/\\vspace{/) {
+if (/\\vspace\{/) {
        add_to_section("<empty-line />");
        next LINE;
 }
+next LINE if /\\pagebreak\b/;
+#replace ' and " with entities
+s/&/&amp;/g;
+s/'/&apos;/g;
+s/"/&quot;/g;
+s/</&lt;/g;
+s/>/&gt;/g;
 #normal mode: 
 if (!$verbatim) {
 #strip TeX comments 
 s/([^\\])%.*$/$1/;
 s/^%.*$//;
 # strip \sloppy
+s/\\(\w+)\\sloppy/\\$1/g;
 s/\\sloppy\s+//g;
-s/\\sloppy{}//g;
+s/\\sloppy\{\}//g;
 s/\\sloppy([^\w])/$1/g;
+# strip extra space
+s/^\s+//;
+s/\s+$//;
+s/(\s)\s+/$1/g;
 #replace TeX ligatures ~ --- << >> \% with appropriate unicode symbols
-s/~/\xA0/g;
-s/---/-/g;
-s/<</«/g;
-s/>>/»/g;
-s/\\%/%/g;
-s/\\dots/\x{2026}/g;
+$_ = fix_ligatures($_);
 }
-#replace ' and " with entities
-s/&/&amp;/g;
-s/'/&apos;/g;
-s/"/&quot;/g;
-s/</&lt;/g;
-s/>/&gt;/g;
 
 if ($poetry) {
        chomp;
-  $buffer.=tag($_,'v');
+       if ($poetic_buffer) {
+               $_ = $poetic_buffer." ".$_;
+               $poetic_buffer = undef;
+       }
+       if (/{[^}]+$/) {
+               $poetic_buffer=$_;
+               next LINE;
+       }
+       s/\s*\\\\$//;
+  $buffer.=tag(flushbuffer($_),'v')."\n";
 } elsif ($verbatim) {
        add_to_section(tag(tag($_,"code"),"p"));
 } else {
-  $buffer.=$_;
+  $buffer.=" ".$_;
 }
 }
 if ($buffer) {
-       add_to_section(tag($buffer,"p"));
+       add_to_section(tag(flushbuffer($buffer),"p"));
        $buffer="";
 }
 
@@ -103,12 +145,17 @@ while (@sections) {
        flushsection();
 }
 print "</body>\n";
-## FIXME print footnotes
+## print footnotes
+print "<body>\n$footnotes\n</body>" if $footnotes;
+print $pics;
 print "</FictionBook>";
 
 sub add_to_section {
        my $data = shift;
        return if ($#sections<0) ;
+       if ($data =~ /^\s*<section>/ && $sections[$#sections]->{data} !~ /^\s*<section>/) {
+               $sections[$#sections]->{data} = tag($sections[$#sections]->{data},"section")."\n";
+       }       
        $sections[$#sections]->{data}.=$data;
 }
 
@@ -119,9 +166,13 @@ sub flushsection {
        if ($str->{title}) {
                $content = tag($str->{title},"title");
        }
-       $content .=  $str->{data};
+       if ($str->{data} =~ /^\s*$/s) {
+               $content .= "<p>\n</p>";
+       } else {
+               $content .=  $str->{data};
+       }
        if ($#sections >=0) {
-               add_to_section(tag($content,$tag));
+               add_to_section(tag($content,$tag)."\n");
        } else {
                print tag($content,$tag);
        }
@@ -142,11 +193,54 @@ sub pushsection {
        while (scalar(@sections) > $found) {
                flushsection;
        }
-       push @sections,{level=>$level,title=>$title,data=>""};
+       push @sections,{level=>$level,title=>fix_ligatures($title),data=>""};
 }
 
+sub fix_ligatures {
+       local $_=shift;
+       s/~/\xA0/g;
+       s/\\-/\xAD/g;
+       s/---/—/g;
+       s/<</«/g;
+       s/>>/»/g;
+       s/\\%/%/g;
+       s/\\dots/\x{2026}/g;
+       s/\\verb(.)(.*)\1/<code>$2<\/code>/;
+       return $_;
+}      
 sub tag {
        my ($content,$name) = @_;
-       return "" if ($content eq "\n");
+       $content =~s/^\s+//s;
+       return "" unless $content;
        return "<$name>$content</$name>";
 }
+
+sub flushbuffer {
+       local $_ = shift;
+
+       s/\\footnote\{(.*)\}/push_footnote($1)/e;
+       $_.="}" if (/^\s*\{.*?[^}]$/) ;
+       s/\{\\(em|it|bf)(?:\s+| \{\})([^{}]+)}/<emphasis>$2<\/emphasis>/g;
+       s/\{\\(tt)(?:\s+|\{\})([^{}]+)}/<code>$2<\/code>/g;
+       s/\\(emph|textit|textbf)\{([^{}]+)\}/<emphasis>$2<\/emphasis>/g;
+       s/[{}]//g;
+       return $_;
+}
+
+
+sub push_footnote {
+       my $id = "note_".(++$idseq);
+       $footnotes.="<section id=\"$id\">".tag(flushbuffer(shift),'p')."</section>\n";
+       return "<a l:href=\"#$id\" type=\"note\">$idseq</a>";
+}
+
+sub mkbinary {
+       my ($filename,$id) = @_;
+       my $f;
+       open $f,"<",$filename;
+       binmode $f;
+       local $/;
+       my $data = encode_base64(<$f>);
+       return "<binary id=\"$id\" content-type=\"image/png\">$data</binary>\n";
+       close $f;
+}