]> www.wagner.pp.ru Git - fiction/Kate-the-Empress.git/blob - Tex2fb2
added fb2-related stuff to .gitignore and make clean. Added version of Tex2fb2 which...
[fiction/Kate-the-Empress.git] / Tex2fb2
1 #!/usr/bin/perl -CDS
2 use utf8;
3 use POSIX qw(strftime);
4 # char-level modes
5 my $poetry = 0;
6 my $verbatim = 0;
7 my @sections;
8 my $buffer;
9 my $idseq = 0; # sequentual number of footnotes
10 my $footnotes="";
11 #
12 # TODO italic paragaphs
13 # footnotes
14 # epigraphs
15 #
16 # print fictionbook header
17 print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
18 print "<FictionBook xmlns=\"http://www.gribuser.ru/xml/fictionbook/2.0\"
19 xmlns:l=\"http://www.w3.org/1999/xlink\">\n";
20 my $metadata = shift @ARGV;
21 open F,"<",$metadata;
22 while (<F>) {
23         # Replace empty date with current date
24         if (/<date\s+\/>/ || /<date>\s*<\/date>/) {
25                 $_ = "<date value=\"".strftime("%Y-%m-%d",localtime())."\">".
26                         strftime("%d/%m/%Y",localtime())."</date>\n";
27         }
28         # Add current to date as fractional part to version
29         if (/<version>(\d*)(.\d*)?<\/version>/) {
30                 my $ver = $1+time()/1E10;
31                 $_=tag(sprintf("%g",$ver),"version")."\n";
32         }
33         print $_;
34 }
35 close F;
36 print "<body>\n";
37 my $header =1;
38 LINE:
39 while (<>) {
40 $environ = undef;
41 if (/\\(begin|end){(\w+)}/) {
42         $environ=$2;
43         $begin=$1 eq "begin";
44         if ($environ eq 'verbatim') {
45                 $verbatim=$begin;
46         } elsif ($environ eq 'verse') {
47                 if ($begin) {
48                 pushsection("poem",undef);
49                 } else {
50                         add_to_section(tag($buffer,'stanza')."\n") if $buffer;
51                         $buffer="";
52                         flushsection('poem');
53                 }
54                 $poetry = $begin;
55         } elsif($environ eq 'document' && $begin) {
56                 $header=0;
57         }
58 }
59 next LINE if $header;
60 if ((/^$/ || $environ) && $buffer) {
61 #output on empty line (p or stanza) depending on poetry mode
62         add_to_section(tag(flushbuffer($buffer),$poetry?"stanza":"p")."\n");
63         $buffer="";
64 }
65 next LINE if $environ;
66 # Section headings
67 if (/\\(part|chapter|section|subsection|subsubsection)\*?{(.*)}/) {
68         if ($buffer) {
69                 add_to_section(tag(flushbuffer($buffer),$poetry?"stanza":"p"));
70                 $buffer="";
71         }
72         pushsection($1,tag($2,"p"));
73         next LINE;
74 }
75 if (/\\vspace{/) {
76         add_to_section("<empty-line />");
77         next LINE;
78 }
79 next LINE if /\\pagebreak\b/;
80 #replace ' and " with entities
81 s/&/&amp;/g;
82 s/'/&apos;/g;
83 s/"/&quot;/g;
84 s/</&lt;/g;
85 s/>/&gt;/g;
86 #normal mode: 
87 if (!$verbatim) {
88 #strip TeX comments 
89 s/([^\\])%.*$/$1/;
90 s/^%.*$//;
91 # strip \sloppy
92 s/\\sloppy\s+//g;
93 s/\\sloppy{}//g;
94 s/\\sloppy([^\w])/$1/g;
95 # strip extra space
96 s/^\s+//;
97 s/\s+$//;
98 s/(\s)\s+/$1/g;
99 #replace TeX ligatures ~ --- << >> \% with appropriate unicode symbols
100 s/~/\xA0/g;
101 s/\\-/\xAD/g;
102 s/---/—/g;
103 s/<</«/g;
104 s/>>/»/g;
105 s/\\%/%/g;
106 s/\\dots/\x{2026}/g;
107 s/\\verb(.)(.*)\1/<code>$2<\/code>/;
108 }
109
110 if ($poetry) {
111         chomp;
112         if ($poetic_buffer) {
113                 $_ = $poetic_buffer." ".$_;
114                 $poetic_buffer = undef;
115         }
116         if (/{[^}]+$/) {
117                 $poetic_buffer=$_;
118                 next LINE;
119         }
120         s/\\footnote{(.*)}/push_footnote($1)/e;
121         s/\s*\\\\$//;
122   $buffer.=tag($_,'v')."\n";
123 } elsif ($verbatim) {
124         add_to_section(tag(tag($_,"code"),"p"));
125 } else {
126   $buffer.=" ".$_;
127 }
128 }
129 if ($buffer) {
130         add_to_section(tag(flushbuffer($buffer),"p"));
131         $buffer="";
132 }
133
134 while (@sections) {
135         flushsection();
136 }
137 print "</body>\n";
138 ## print footnotes
139 print "<body>\n$footnotes\n</body>" if $footnotes;
140 print "</FictionBook>";
141
142 sub add_to_section {
143         my $data = shift;
144         return if ($#sections<0) ;
145         if ($data =~ /^\s*<section>/ && $sections[$#sections]->{data} !~ /^\s*<section>/) {
146                 $sections[$#sections]->{data} = tag($sections[$#sections]->{data},"section")."\n";
147         }       
148         $sections[$#sections]->{data}.=$data;
149 }
150
151 sub flushsection {
152         my $tag= shift || 'section';
153         my $str = pop @sections;
154         my $content="";
155         if ($str->{title}) {
156                 $content = tag($str->{title},"title");
157         }
158         if ($str->{data} =~ /^\s*$/s) {
159                 $content .= "<p>\n</p>";
160         } else {
161                 $content .=  $str->{data};
162         }
163         if ($#sections >=0) {
164                 add_to_section(tag($content,$tag)."\n");
165         } else {
166                 print tag($content,$tag);
167         }
168 }
169
170 sub pushsection {
171         my ($level,$title)=@_;
172         # Find section of $level in the current stack
173         my $found=scalar(@sections);
174         LEVEL:
175         for (my $i=0;$i<=$#sections;$i++) {
176                 if ($sections[$i]->{level} eq $level) {
177                         $found=$i;
178                         last LEVEL;
179                 }
180         }
181         # if found, flush everything below
182         while (scalar(@sections) > $found) {
183                 flushsection;
184         }
185         push @sections,{level=>$level,title=>$title,data=>""};
186 }
187
188 sub tag {
189         my ($content,$name) = @_;
190         $content =~s/^\s+//s;
191         return "" unless $content;
192         return "<$name>$content</$name>";
193 }
194
195 sub flushbuffer {
196         local $_ = shift;
197         s/{\\(em|it|bf)(?:\s+|{})([^{}]+)}/<emphasis>$2<\/emphasis>/g;
198         s/\\(emph|textit|textbf){([^{}]+)}/<emphasis>$2<\/emphasis>/g;
199         s/\\footnote{(.*)}/push_footnote($1)/e;
200         s/[{}]//g;
201         return $_;
202 }
203
204
205 sub push_footnote {
206         my $id = "note_".(++$idseq);
207         $footnotes.="<section id=\"$id\">".tag(shift,'p')."</section>\n";
208         return "<a l:href=\"#$id\" type=\"note\">$idseq</a>";
209 }