]> www.wagner.pp.ru Git - fiction/Kate-the-Empress.git/blob - Tex2fb2
fixed translation of verse to poem
[fiction/Kate-the-Empress.git] / Tex2fb2
1 #!/usr/bin/perl -CDS
2 use utf8;
3 # char-level modes
4 my $poetry = 0;
5 my $verbatim = 0;
6 my @sections;
7 my $buffer;
8 #
9 # TODO italic paragaphs
10 # footnotes
11 # epigraphs
12 #
13 # print fictionbook header
14 print "<?xml version=\"1.0\" encoding=\"UTF-8\">\n";
15 print "<FictionBook xmlns=\"http://www.gribuser.ru/xml/fictionbook/2.0\"
16 xmlns:l=\"http://www.w3.org/1999/xlink\">\n";
17 my $metadata = shift @ARGV;
18 open F,"<",$metadata;
19 while (<F>) {
20         print $_;
21 }
22 close F;
23
24 my $header =1;
25 LINE:
26 while (<>) {
27 $environ = undef;
28 if (/\\(begin|end){(\w+)}/) {
29         $environ=$2;
30         $begin=$1 eq "begin";
31         if ($environ eq 'verbatim') {
32                 $verbatim=$begin;
33         } elsif ($environ eq 'verse') {
34                 if ($begin) {
35                 pushsection("poem",undef);
36                 } else {
37                         add_to_section(tag($buffer,'stanza')."\n") if $buffer;
38                         $buffer="";
39                         flushsection('poem');
40                 }
41                 $poetry = $begin;
42         } elsif($environ eq 'document' && $begin) {
43                 $header=0;
44         }
45 }
46 next LINE if $header;
47 if ((/^$/ || $environ) && $buffer) {
48 #output on empty line (p or stanza) depending on poetry mode
49         add_to_section(tag($buffer,$poetry?"stanza":"p")."\n");
50         $buffer="";
51 }
52 next LINE if $environ;
53 # Section headings
54 if (/\\(part|chapter|section|subsection|subsubsection)\*?{(.*)}/) {
55         if ($buffer) {
56                 add_to_section(tag($buffer,$poetry?"stanza":"p"));
57                 $buffer="";
58         }
59         pushsection($1,$2);
60         next LINE;
61 }
62 if (/\\vspace{/) {
63         add_to_section("<empty-line />");
64         next LINE;
65 }
66 next LINE if /\\pagebreak\b/;
67 #normal mode: 
68 if (!$verbatim) {
69 #strip TeX comments 
70 s/([^\\])%.*$/$1/;
71 s/^%.*$//;
72 # strip \sloppy
73 s/\\sloppy\s+//g;
74 s/\\sloppy{}//g;
75 s/\\sloppy([^\w])/$1/g;
76 # strip extra space
77 s/^\s+//;
78 s/\s+$//;
79 s/(\s)\s+/$1/g;
80 #replace TeX ligatures ~ --- << >> \% with appropriate unicode symbols
81 s/~/\xA0/g;
82 s/---/-/g;
83 s/<</«/g;
84 s/>>/»/g;
85 s/\\%/%/g;
86 s/\\dots/\x{2026}/g;
87 }
88 #replace ' and " with entities
89 s/&/&amp;/g;
90 s/'/&apos;/g;
91 s/"/&quot;/g;
92 s/</&lt;/g;
93 s/>/&gt;/g;
94
95 if ($poetry) {
96         chomp;
97         s/\s*\\\\$//;
98   $buffer.=tag($_,'v')."\n";
99 } elsif ($verbatim) {
100         add_to_section(tag(tag($_,"code"),"p"));
101 } else {
102   $buffer.=" ".$_;
103 }
104 }
105 if ($buffer) {
106         add_to_section(tag($buffer,"p"));
107         $buffer="";
108 }
109
110 while (@sections) {
111         flushsection();
112 }
113 print "</body>\n";
114 ## FIXME print footnotes
115 print "</FictionBook>";
116
117 sub add_to_section {
118         my $data = shift;
119         return if ($#sections<0) ;
120         $sections[$#sections]->{data}.=$data;
121 }
122
123 sub flushsection {
124         my $tag= shift || 'section';
125         my $str = pop @sections;
126         my $content="";
127         if ($str->{title}) {
128                 $content = tag($str->{title},"title");
129         }
130         $content .=  $str->{data};
131         if ($#sections >=0) {
132                 add_to_section(tag($content,$tag)."\n");
133         } else {
134                 print tag($content,$tag);
135         }
136 }
137
138 sub pushsection {
139         my ($level,$title)=@_;
140         # Find section of $level in the current stack
141         my $found=scalar(@sections);
142         LEVEL:
143         for (my $i=0;$i<=$#sections;$i++) {
144                 if ($sections[$i]->{level} eq $level) {
145                         $found=$i;
146                         last LEVEL;
147                 }
148         }
149         # if found, flush everything below
150         while (scalar(@sections) > $found) {
151                 flushsection;
152         }
153         push @sections,{level=>$level,title=>$title,data=>""};
154 }
155
156 sub tag {
157         my ($content,$name) = @_;
158         return "" if $content =~ /^\s*$/s;
159         return "<$name>$content</$name>";
160 }