]> www.wagner.pp.ru Git - fiction/Kate-the-Empress.git/blob - Tex2fb2
Typo fixes
[fiction/Kate-the-Empress.git] / Tex2fb2
1 #!/usr/bin/perl -CDS
2
3 use utf8;
4 use POSIX qw(strftime);
5 use MIME::Base64;
6 # char-level modes
7 my $poetry = 0;
8 my $verbatim = 0;
9 my @sections;
10 my $buffer;
11 my $idseq = 0; # sequentual number of footnotes
12 my $footnotes="";
13 #
14 # TODO italic paragaphs
15 # footnotes
16 # epigraphs
17 #
18 # print fictionbook header
19 print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
20 print "<FictionBook xmlns=\"http://www.gribuser.ru/xml/fictionbook/2.0\"
21 xmlns:l=\"http://www.w3.org/1999/xlink\">\n";
22 my $metadata = shift @ARGV;
23 open F,"<",$metadata;
24 my $pics = "";
25 while (<F>) {
26         # Replace empty date with current date
27         if (/<date\s+\/>/ || /<date>\s*<\/date>/) {
28                 $_ = "<date value=\"".strftime("%Y-%m-%d",localtime())."\">".
29                         strftime("%d/%m/%Y",localtime())."</date>\n";
30         }
31         # Add current to date as fractional part to version
32         if (/<version>(\d*)(.\d*)?<\/version>/) {
33                 my $ver = $1+time()/1E10;
34                 $_=tag(sprintf("%g",$ver),"version")."\n";
35         }
36         # Если  существует cover.png, добавляем coverpage
37         if (/<coverpage \/>/) {
38                 $_="";
39                 COVER:
40                 for $file ("cover.jpg","cover.png") {
41                         if (-f $file) { 
42                                 my $id = $file;
43                                 $id=~tr/./_/;
44                                 $_ = "<coverpage>\n<image l:href=\"#$id\" />\n</coverpage>\n";
45                                 $pics .= mkbinary($file,$id);
46                                 last COVER;
47                         }
48                 }       
49         }
50         print $_;
51 }
52 close F;
53 print "<body>\n";
54 my $header =1;
55 LINE:
56 while (<>) {
57 $environ = undef;
58 if (/\\(begin|end){(\w+)}/) {
59         $environ=$2;
60         $begin=$1 eq "begin";
61         if ($environ eq 'verbatim') {
62                 $verbatim=$begin;
63         } elsif ($environ eq 'verse') {
64                 if ($begin) {
65                 pushsection("poem",undef);
66                 } else {
67                         add_to_section(tag($buffer,'stanza')."\n") if $buffer;
68                         $buffer="";
69                         flushsection('poem');
70                 }
71                 $poetry = $begin;
72         } elsif($environ eq 'document' && $begin) {
73                 $header=0;
74         }
75 }
76 next LINE if $header;
77 if ((/^$/ || $environ) && $buffer) {
78 #output on empty line (p or stanza) depending on poetry mode
79         add_to_section(tag(flushbuffer($buffer),$poetry?"stanza":"p")."\n");
80         $buffer="";
81 }
82 next LINE if $environ;
83 # Section headings
84 if (/\\(part|chapter|section|subsection|subsubsection)\*?\{(.*)\}/) {
85         if ($buffer) {
86                 add_to_section(tag(flushbuffer($buffer),$poetry?"stanza":"p"));
87                 $buffer="";
88         }
89         pushsection($1,tag($2,"p"));
90         next LINE;
91 }
92 if (/\\vspace\{/) {
93         add_to_section("<empty-line />");
94         next LINE;
95 }
96 next LINE if /\\pagebreak\b/;
97 #replace ' and " with entities
98 s/&/&amp;/g;
99 s/'/&apos;/g;
100 s/"/&quot;/g;
101 s/</&lt;/g;
102 s/>/&gt;/g;
103 #normal mode: 
104 if (!$verbatim) {
105 #strip TeX comments 
106 s/([^\\])%.*$/$1/;
107 s/^%.*$//;
108 # strip \sloppy
109 s/\\(\w+)\\sloppy/\\$1/g;
110 s/\\sloppy\s+//g;
111 s/\\sloppy\{\}//g;
112 s/\\sloppy([^\w])/$1/g;
113 # strip extra space
114 s/^\s+//;
115 s/\s+$//;
116 s/(\s)\s+/$1/g;
117 #replace TeX ligatures ~ --- << >> \% with appropriate unicode symbols
118 $_ = fix_ligatures($_);
119 }
120
121 if ($poetry) {
122         chomp;
123         if ($poetic_buffer) {
124                 $_ = $poetic_buffer." ".$_;
125                 $poetic_buffer = undef;
126         }
127         if (/{[^}]+$/) {
128                 $poetic_buffer=$_;
129                 next LINE;
130         }
131         s/\s*\\\\$//;
132   $buffer.=tag(flushbuffer($_),'v')."\n";
133 } elsif ($verbatim) {
134         add_to_section(tag(tag($_,"code"),"p"));
135 } else {
136   $buffer.=" ".$_;
137 }
138 }
139 if ($buffer) {
140         add_to_section(tag(flushbuffer($buffer),"p"));
141         $buffer="";
142 }
143
144 while (@sections) {
145         flushsection();
146 }
147 print "</body>\n";
148 ## print footnotes
149 print "<body>\n$footnotes\n</body>" if $footnotes;
150 print $pics;
151 print "</FictionBook>";
152
153 sub add_to_section {
154         my $data = shift;
155         return if ($#sections<0) ;
156         if ($data =~ /^\s*<section>/ && $sections[$#sections]->{data} !~ /^\s*<section>/) {
157                 $sections[$#sections]->{data} = tag($sections[$#sections]->{data},"section")."\n";
158         }       
159         $sections[$#sections]->{data}.=$data;
160 }
161
162 sub flushsection {
163         my $tag= shift || 'section';
164         my $str = pop @sections;
165         my $content="";
166         if ($str->{title}) {
167                 $content = tag($str->{title},"title");
168         }
169         if ($str->{data} =~ /^\s*$/s) {
170                 $content .= "<p>\n</p>";
171         } else {
172                 $content .=  $str->{data};
173         }
174         if ($#sections >=0) {
175                 add_to_section(tag($content,$tag)."\n");
176         } else {
177                 print tag($content,$tag);
178         }
179 }
180
181 sub pushsection {
182         my ($level,$title)=@_;
183         # Find section of $level in the current stack
184         my $found=scalar(@sections);
185         LEVEL:
186         for (my $i=0;$i<=$#sections;$i++) {
187                 if ($sections[$i]->{level} eq $level) {
188                         $found=$i;
189                         last LEVEL;
190                 }
191         }
192         # if found, flush everything below
193         while (scalar(@sections) > $found) {
194                 flushsection;
195         }
196         push @sections,{level=>$level,title=>fix_ligatures($title),data=>""};
197 }
198
199 sub fix_ligatures {
200         local $_=shift;
201         s/~/\xA0/g;
202         s/\\-/\xAD/g;
203         s/---/—/g;
204         s/<</«/g;
205         s/>>/»/g;
206         s/\\%/%/g;
207         s/\\dots/\x{2026}/g;
208         s/\\verb(.)(.*)\1/<code>$2<\/code>/;
209         return $_;
210 }       
211 sub tag {
212         my ($content,$name) = @_;
213         $content =~s/^\s+//s;
214         return "" unless $content;
215         return "<$name>$content</$name>";
216 }
217
218 sub flushbuffer {
219         local $_ = shift;
220
221         s/\\footnote\{(.*)\}/push_footnote($1)/e;
222         $_.="}" if (/^\s*\{.*?[^}]$/) ;
223         s/\{\\(em|it|bf)(?:\s+| \{\})([^{}]+)}/<emphasis>$2<\/emphasis>/g;
224         s/\{\\(tt)(?:\s+|\{\})([^{}]+)}/<code>$2<\/code>/g;
225         s/\\(emph|textit|textbf)\{([^{}]+)\}/<emphasis>$2<\/emphasis>/g;
226         s/[{}]//g;
227         return $_;
228 }
229
230
231 sub push_footnote {
232         my $id = "note_".(++$idseq);
233         $footnotes.="<section id=\"$id\">".tag(flushbuffer(shift),'p')."</section>\n";
234         return "<a l:href=\"#$id\" type=\"note\">$idseq</a>";
235 }
236
237 sub mkbinary {
238         my ($filename,$id) = @_;
239         my $f;
240         open $f,"<",$filename;
241         binmode $f;
242         local $/;
243         my $data = encode_base64(<$f>);
244         return "<binary id=\"$id\" content-type=\"image/png\">$data</binary>\n";
245         close $f;
246 }