]> www.wagner.pp.ru Git - oss/ljdump.git/blob - ljmkstatic
Added convertier of dump into static pages
[oss/ljdump.git] / ljmkstatic
1 #!/usr/bin/python
2 # -*- encoding: utf-8 -*-
3 """
4 Formats two files, downloaded by ljdump.py into HTML page using
5 template.
6
7 Syntax ljformatxml -t template -o file.html L-nnnn C-nnnn
8
9 """
10 from ConfigParser import ConfigParser
11 import xml.dom.minidom, xml.dom
12 import re
13 import sys,os,glob
14 import codecs
15 # Параметры конфигурации
16 # Директория для симлинков
17 # url картинки с человечком
18 # Директория с результатами дампа
19 # директория для картинки
20 # шаблон для страницы поста
21 # шаблон для блока комментария
22 template={}
23 urls={}
24 dirs={}
25 def read_templates(config):
26     global template
27     for i in ['post','comment']:
28         with codecs.open(config.get('templates',i),'r','utf-8') as f:
29             template[i]=f.read()
30
31
32 def set_parameters(config):
33     global urls,dirs
34     for i in config.options('urls'):
35         urls[i]=config.get('urls',i)
36     for i in config.options('directories'):
37         dirs[i]=config.get('directories',i)
38
39 def process_ljtag(m):
40     """
41     Receives lj tag match object with lj tag and returns
42     html text which should be used as replacement
43     Used to pass into re.sub
44     """
45     tag = m.group(0)
46     if tag.find("lj-cut")!=-1:
47         return ""
48     if tag.find('user=')!=-1:
49         name= re.search('user=[\'\"]?(\w+)[\'\"]?',tag).group(1)
50         title = re.search('title=[\"\']?([^"\'>]+)[\'\"]?',tag)
51         if title:
52             title = title.group(1)
53         else:
54             title=name
55         tag= '<a style="color: blue; font-weight: bold;" href="http://www.livejournal.com/users/%s/profile"><img src="%s">%s</a>'%(name,urls['icons']+"/userinfo.gif",title)
56         return tag
57     print "unknown lj tag: ",tag
58
59 def process_text(text):
60       # Выделить оттуда текст, распарсить как html, заменяя lj-тэги
61       try:
62         text = re.sub("</?lj[^>]+>",process_ljtag,text)
63       except Exception as e:
64         print 'bad text :',text
65         raise e
66       text = re.sub("\r?\n","<br>",text)
67       # и заменяя img на локальные копии, если они есть. Если нет, писать в
68       # кеш картинок
69       # FIXME post_props[post_text] = re.sub("<img # [^>]+>",process_img.post_text)
70       return text
71 def format_comments(cmt_list):
72     out=[]
73     for cmt in cmt_list:
74         if len(cmt['children']):
75             cmt['comments']=format_comments(cmt['children'])
76         else:
77             cmt['comments']=''
78         if 'user' in cmt:
79             cmt['userlink']=process_text('<lj user="%s">'%cmt['user'])
80         out.append(template['comment'] % cmt)
81     return ''.join(out)
82
83 def do_post(postfile,commentfile,outputfile):
84     """
85     Handles one post. Returns post date, url, subject and tag list
86     """
87 # Прочитать L-nnnn
88     post_xml = xml.dom.minidom.parse(postfile)
89     post_props = {'subject':'','taglist':''}
90     for n in post_xml.documentElement.childNodes:
91         if n.nodeType == xml.dom.Node.ELEMENT_NODE:
92             if n.nodeName == u'event':
93                 post_props['text']=process_text(n.firstChild.nodeValue)
94             elif n.nodeName == u'props':
95             # Выделить необходимую метаинформацию
96                 for n2 in n.childNodes:
97                     if n2.nodeType == xml.dom.Node.ELEMENT_NODE:
98                         post_props[str(n2.nodeName)] = n2.firstChild.nodeValue
99             else:
100                 post_props[str(n.nodeName)] = n.firstChild.nodeValue
101
102     if not 'text' in post_props:
103         raise ValueError("No event node in ths post")
104     if 'picture_keyword' in post_props:
105         userpic=post_props['picture_keyword']
106     else:
107         userpic='_'
108     for fmt in ('jpg','gif','png'):
109         if os.access("%s/%s.%s" % (dirs['archive'],userpic,fmt),os.R_OK):
110             post_props['userpic']='%s/userpics/%s.%s'%(urls['images'],userpic,fmt)
111             break
112     if commentfile:
113         comment_xml = xml.dom.minidom.parse(  commentfile)
114     # We suppose that comments are already sorted accoridng to post time    
115         comment_tree = []
116         comment_hash = {}
117         comment_count = 0
118         for c in comment_xml.documentElement.childNodes:
119             if c.nodeType != xml.dom.Node.ELEMENT_NODE or c.nodeName != 'comment':
120                 continue
121             comment={'date':'Unknown','children':[],'subject':'','userlink':'(Anonymous)'}
122             for i in c.childNodes:
123                 if i.nodeType != xml.dom.Node.ELEMENT_NODE:
124                     continue
125                 if i.nodeName == 'body':
126                     if i.firstChild is None:
127                         comment['body']='<b>Deleted comment</b>' 
128                     else:  
129                         comment['body']=process_text(i.firstChild.nodeValue)
130                 else:
131                     tx=i.firstChild
132                     if tx:
133                         comment[str(i.nodeName)]=tx.nodeValue
134             comment_hash[comment['id']]=comment
135             if 'parentid' in comment and comment['parentid'] in comment_hash:
136                 comment_hash[comment['parentid']]['children'].append(comment)
137                 comment_count +=1
138             else:
139                 comment_tree.append(comment)
140                 
141         post_props['comments'] = format_comments(comment_tree)                
142         post_props['comment_count'] = comment_count
143     else:
144         post_props['comments'] = ''
145         post_props['comment_count'] = 0
146     page = template['post']%post_props
147
148     with codecs.open(outputfile,"w","utf-8") as f :
149         f.write(page)
150     return (post_props['logtime'],post_props['ditemid'],post_props['subject'],post_props['taglist'])
151
152
153
154
155 if __name__ == '__main__':
156     config=ConfigParser()
157     if config.read(["ljmkstatic.conf"]) < 1:
158         raise ValueError("No config file found")
159     read_templates(config)
160     set_parameters(config)
161     for post_file in sorted(glob.glob(dirs['dump']+"/L-*")):
162         post_id = re.search("(\d+)$",post_file).group(1)
163         comment_file = dirs['dump']+"/C-"+post_id
164         outfile=dirs['dump']+"/"+post_id+".html"
165         try:
166             t1=os.stat(post_file).st_mtime
167             try:
168                 t2=os.stat(comment_file).st_mtime
169             except OSError:
170                 t2=0
171                 comment_file = None
172             t3=os.stat(outfile).st_mtime
173             if t3 > t1 and t3 > t2:
174                 continue
175         except OSError:
176             pass
177         print "Processing post L-%s"%post_id
178         (date,post_id,subject,tags) = do_post(post_file,comment_file,outfile)
179         # Fix me - update index structures
180