ljmkstatic

   1 #!/usr/bin/python
   2 # -*- encoding: utf-8 -*-
   3 """
   4 Formats two files, downloaded by ljdump.py into HTML page using
   5 template.
   6
   7 Syntax ljformatxml -t template -o file.html L-nnnn C-nnnn
   8
   9 """
  10 from ConfigParser import ConfigParser
  11 import xml.dom.minidom, xml.dom
  12 import re
  13 import sys,os,glob
  14 import codecs
  15 # Параметры конфигурации
  16 # Директория для симлинков
  17 # url картинки с человечком
  18 # Директория с результатами дампа
  19 # директория для картинки
  20 # шаблон для страницы поста
  21 # шаблон для блока комментария
  22 template={}
  23 urls={}
  24 dirs={}
  25 def read_templates(config):
  26     global template
  27     for i in ['post','comment']:
  28         with codecs.open(config.get('templates',i),'r','utf-8') as f:
  29             template[i]=f.read()
  30
  31
  32 def set_parameters(config):
  33     global urls,dirs
  34     for i in config.options('urls'):
  35         urls[i]=config.get('urls',i)
  36     for i in config.options('directories'):
  37         dirs[i]=config.get('directories',i)
  38
  39 def process_ljtag(m):
  40     """
  41     Receives lj tag match object with lj tag and returns
  42     html text which should be used as replacement
  43     Used to pass into re.sub
  44     """
  45     tag = m.group(0)
  46     if tag.find("lj-cut")!=-1:
  47         return ""
  48     if tag.find('user=')!=-1:
  49         name= re.search('user=[\'\"]?(\w+)[\'\"]?',tag).group(1)
  50         title = re.search('title=[\"\']?([^"\'>]+)[\'\"]?',tag)
  51         if title:
  52             title = title.group(1)
  53         else:
  54             title=name
  55         tag= '<a style="color: blue; font-weight: bold;" href="http://www.livejournal.com/users/%s/profile"><img src="%s">%s</a>'%(name,urls['icons']+"/userinfo.gif",title)
  56         return tag
  57     print "unknown lj tag: ",tag
  58
  59 def process_text(text):
  60       # Выделить оттуда текст, распарсить как html, заменяя lj-тэги
  61       try:
  62         text = re.sub("</?lj[^>]+>",process_ljtag,text)
  63       except Exception as e:
  64         print 'bad text :',text
  65         raise e
  66       text = re.sub("\r?\n","<br>",text)
  67       # и заменяя img на локальные копии, если они есть. Если нет, писать в
  68       # кеш картинок
  69       # FIXME post_props[post_text] = re.sub("<img # [^>]+>",process_img.post_text)
  70       return text
  71 def format_comments(cmt_list):
  72     out=[]
  73     for cmt in cmt_list:
  74         if len(cmt['children']):
  75             cmt['comments']=format_comments(cmt['children'])
  76         else:
  77             cmt['comments']=''
  78         if 'user' in cmt:
  79             cmt['userlink']=process_text('<lj user="%s">'%cmt['user'])
  80         out.append(template['comment'] % cmt)
  81     return ''.join(out)
  82
  83 def do_post(postfile,commentfile,outputfile):
  84     """
  85     Handles one post. Returns post date, url, subject and tag list
  86     """
  87 # Прочитать L-nnnn
  88     post_xml = xml.dom.minidom.parse(postfile)
  89     post_props = {'subject':'','taglist':''}
  90     for n in post_xml.documentElement.childNodes:
  91         if n.nodeType == xml.dom.Node.ELEMENT_NODE:
  92             if n.nodeName == u'event':
  93                 post_props['text']=process_text(n.firstChild.nodeValue)
  94             elif n.nodeName == u'props':
  95             # Выделить необходимую метаинформацию
  96                 for n2 in n.childNodes:
  97                     if n2.nodeType == xml.dom.Node.ELEMENT_NODE:
  98                         post_props[str(n2.nodeName)] = n2.firstChild.nodeValue
  99             else:
 100                 post_props[str(n.nodeName)] = n.firstChild.nodeValue
 101
 102     if not 'text' in post_props:
 103         raise ValueError("No event node in ths post")
 104     if 'picture_keyword' in post_props:
 105         userpic=post_props['picture_keyword']
 106     else:
 107         userpic='_'
 108     for fmt in ('jpg','gif','png'):
 109         if os.access("%s/%s.%s" % (dirs['archive'],userpic,fmt),os.R_OK):
 110             post_props['userpic']='%s/userpics/%s.%s'%(urls['images'],userpic,fmt)
 111             break
 112     if commentfile:
 113         comment_xml = xml.dom.minidom.parse(  commentfile)
 114     # We suppose that comments are already sorted accoridng to post time
 115         comment_tree = []
 116         comment_hash = {}
 117         comment_count = 0
 118         for c in comment_xml.documentElement.childNodes:
 119             if c.nodeType != xml.dom.Node.ELEMENT_NODE or c.nodeName != 'comment':
 120                 continue
 121             comment={'date':'Unknown','children':[],'subject':'','userlink':'(Anonymous)'}
 122             for i in c.childNodes:
 123                 if i.nodeType != xml.dom.Node.ELEMENT_NODE:
 124                     continue
 125                 if i.nodeName == 'body':
 126                     if i.firstChild is None:
 127                         comment['body']='<b>Deleted comment</b>'
 128                     else:
 129                         comment['body']=process_text(i.firstChild.nodeValue)
 130                 else:
 131                     tx=i.firstChild
 132                     if tx:
 133                         comment[str(i.nodeName)]=tx.nodeValue
 134             comment_hash[comment['id']]=comment
 135             if 'parentid' in comment and comment['parentid'] in comment_hash:
 136                 comment_hash[comment['parentid']]['children'].append(comment)
 137                 comment_count +=1
 138             else:
 139                 comment_tree.append(comment)
 140
 141         post_props['comments'] = format_comments(comment_tree)
 142         post_props['comment_count'] = comment_count
 143     else:
 144         post_props['comments'] = ''
 145         post_props['comment_count'] = 0
 146     page = template['post']%post_props
 147
 148     with codecs.open(outputfile,"w","utf-8") as f :
 149         f.write(page)
 150     return (post_props['logtime'],post_props['ditemid'],post_props['subject'],post_props['taglist'])
 151
 152
 153
 154
 155 if __name__ == '__main__':
 156     config=ConfigParser()
 157     if config.read(["ljmkstatic.conf"]) < 1:
 158         raise ValueError("No config file found")
 159     read_templates(config)
 160     set_parameters(config)
 161     for post_file in sorted(glob.glob(dirs['dump']+"/L-*")):
 162         post_id = re.search("(\d+)$",post_file).group(1)
 163         comment_file = dirs['dump']+"/C-"+post_id
 164         outfile=dirs['dump']+"/"+post_id+".html"
 165         try:
 166             t1=os.stat(post_file).st_mtime
 167             try:
 168                 t2=os.stat(comment_file).st_mtime
 169             except OSError:
 170                 t2=0
 171                 comment_file = None
 172             t3=os.stat(outfile).st_mtime
 173             if t3 > t1 and t3 > t2:
 174                 continue
 175         except OSError:
 176             pass
 177         print "Processing post L-%s"%post_id
 178         (date,post_id,subject,tags) = do_post(post_file,comment_file,outfile)
 179         # Fix me - update index structures
 180