X-Git-Url: http://www.wagner.pp.ru/gitweb/?a=blobdiff_plain;f=ljmkstatic;fp=ljmkstatic;h=a27cf3d1d4b310934e890bcd6be17bd8c2377713;hb=defcff9110fc3331a435a3b47e25eaf15d5166a0;hp=0000000000000000000000000000000000000000;hpb=283cbafcc92879ab5eaa29cbc9ffc854d340af79;p=oss%2Fljdump.git
diff --git a/ljmkstatic b/ljmkstatic
new file mode 100644
index 0000000..a27cf3d
--- /dev/null
+++ b/ljmkstatic
@@ -0,0 +1,180 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+"""
+Formats two files, downloaded by ljdump.py into HTML page using
+template.
+
+Syntax ljformatxml -t template -o file.html L-nnnn C-nnnn
+
+"""
+from ConfigParser import ConfigParser
+import xml.dom.minidom, xml.dom
+import re
+import sys,os,glob
+import codecs
+# ÐаÑамеÑÑÑ ÐºÐ¾Ð½ÑигÑÑаÑии
+# ÐиÑекÑоÑÐ¸Ñ Ð´Ð»Ñ Ñимлинков
+# url каÑÑинки Ñ ÑеловеÑком
+# ÐиÑекÑоÑÐ¸Ñ Ñ ÑезÑлÑÑаÑами дампа
+# диÑекÑоÑÐ¸Ñ Ð´Ð»Ñ ÐºÐ°ÑÑинки
+# Ñаблон Ð´Ð»Ñ ÑÑÑаниÑÑ Ð¿Ð¾ÑÑа
+# Ñаблон Ð´Ð»Ñ Ð±Ð»Ð¾ÐºÐ° комменÑаÑиÑ
+template={}
+urls={}
+dirs={}
+def read_templates(config):
+ global template
+ for i in ['post','comment']:
+ with codecs.open(config.get('templates',i),'r','utf-8') as f:
+ template[i]=f.read()
+
+
+def set_parameters(config):
+ global urls,dirs
+ for i in config.options('urls'):
+ urls[i]=config.get('urls',i)
+ for i in config.options('directories'):
+ dirs[i]=config.get('directories',i)
+
+def process_ljtag(m):
+ """
+ Receives lj tag match object with lj tag and returns
+ html text which should be used as replacement
+ Used to pass into re.sub
+ """
+ tag = m.group(0)
+ if tag.find("lj-cut")!=-1:
+ return ""
+ if tag.find('user=')!=-1:
+ name= re.search('user=[\'\"]?(\w+)[\'\"]?',tag).group(1)
+ title = re.search('title=[\"\']?([^"\'>]+)[\'\"]?',tag)
+ if title:
+ title = title.group(1)
+ else:
+ title=name
+ tag= '%s'%(name,urls['icons']+"/userinfo.gif",title)
+ return tag
+ print "unknown lj tag: ",tag
+
+def process_text(text):
+ # ÐÑделиÑÑ Ð¾ÑÑÑда ÑекÑÑ, ÑаÑпаÑÑиÑÑ ÐºÐ°Ðº html, заменÑÑ lj-ÑÑги
+ try:
+ text = re.sub("?lj[^>]+>",process_ljtag,text)
+ except Exception as e:
+ print 'bad text :',text
+ raise e
+ text = re.sub("\r?\n","
",text)
+ # и заменÑÑ img на локалÑнÑе копии, еÑли они еÑÑÑ. ÐÑли неÑ, пиÑаÑÑ Ð²
+ # ÐºÐµÑ ÐºÐ°ÑÑинок
+ # FIXME post_props[post_text] = re.sub("]+>",process_img.post_text)
+ return text
+def format_comments(cmt_list):
+ out=[]
+ for cmt in cmt_list:
+ if len(cmt['children']):
+ cmt['comments']=format_comments(cmt['children'])
+ else:
+ cmt['comments']=''
+ if 'user' in cmt:
+ cmt['userlink']=process_text(''%cmt['user'])
+ out.append(template['comment'] % cmt)
+ return ''.join(out)
+
+def do_post(postfile,commentfile,outputfile):
+ """
+ Handles one post. Returns post date, url, subject and tag list
+ """
+# ÐÑоÑиÑаÑÑ L-nnnn
+ post_xml = xml.dom.minidom.parse(postfile)
+ post_props = {'subject':'','taglist':''}
+ for n in post_xml.documentElement.childNodes:
+ if n.nodeType == xml.dom.Node.ELEMENT_NODE:
+ if n.nodeName == u'event':
+ post_props['text']=process_text(n.firstChild.nodeValue)
+ elif n.nodeName == u'props':
+ # ÐÑделиÑÑ Ð½ÐµÐ¾Ð±Ñ
одимÑÑ Ð¼ÐµÑаинÑоÑмаÑиÑ
+ for n2 in n.childNodes:
+ if n2.nodeType == xml.dom.Node.ELEMENT_NODE:
+ post_props[str(n2.nodeName)] = n2.firstChild.nodeValue
+ else:
+ post_props[str(n.nodeName)] = n.firstChild.nodeValue
+
+ if not 'text' in post_props:
+ raise ValueError("No event node in ths post")
+ if 'picture_keyword' in post_props:
+ userpic=post_props['picture_keyword']
+ else:
+ userpic='_'
+ for fmt in ('jpg','gif','png'):
+ if os.access("%s/%s.%s" % (dirs['archive'],userpic,fmt),os.R_OK):
+ post_props['userpic']='%s/userpics/%s.%s'%(urls['images'],userpic,fmt)
+ break
+ if commentfile:
+ comment_xml = xml.dom.minidom.parse( commentfile)
+ # We suppose that comments are already sorted accoridng to post time
+ comment_tree = []
+ comment_hash = {}
+ comment_count = 0
+ for c in comment_xml.documentElement.childNodes:
+ if c.nodeType != xml.dom.Node.ELEMENT_NODE or c.nodeName != 'comment':
+ continue
+ comment={'date':'Unknown','children':[],'subject':'','userlink':'(Anonymous)'}
+ for i in c.childNodes:
+ if i.nodeType != xml.dom.Node.ELEMENT_NODE:
+ continue
+ if i.nodeName == 'body':
+ if i.firstChild is None:
+ comment['body']='Deleted comment'
+ else:
+ comment['body']=process_text(i.firstChild.nodeValue)
+ else:
+ tx=i.firstChild
+ if tx:
+ comment[str(i.nodeName)]=tx.nodeValue
+ comment_hash[comment['id']]=comment
+ if 'parentid' in comment and comment['parentid'] in comment_hash:
+ comment_hash[comment['parentid']]['children'].append(comment)
+ comment_count +=1
+ else:
+ comment_tree.append(comment)
+
+ post_props['comments'] = format_comments(comment_tree)
+ post_props['comment_count'] = comment_count
+ else:
+ post_props['comments'] = ''
+ post_props['comment_count'] = 0
+ page = template['post']%post_props
+
+ with codecs.open(outputfile,"w","utf-8") as f :
+ f.write(page)
+ return (post_props['logtime'],post_props['ditemid'],post_props['subject'],post_props['taglist'])
+
+
+
+
+if __name__ == '__main__':
+ config=ConfigParser()
+ if config.read(["ljmkstatic.conf"]) < 1:
+ raise ValueError("No config file found")
+ read_templates(config)
+ set_parameters(config)
+ for post_file in sorted(glob.glob(dirs['dump']+"/L-*")):
+ post_id = re.search("(\d+)$",post_file).group(1)
+ comment_file = dirs['dump']+"/C-"+post_id
+ outfile=dirs['dump']+"/"+post_id+".html"
+ try:
+ t1=os.stat(post_file).st_mtime
+ try:
+ t2=os.stat(comment_file).st_mtime
+ except OSError:
+ t2=0
+ comment_file = None
+ t3=os.stat(outfile).st_mtime
+ if t3 > t1 and t3 > t2:
+ continue
+ except OSError:
+ pass
+ print "Processing post L-%s"%post_id
+ (date,post_id,subject,tags) = do_post(post_file,comment_file,outfile)
+ # Fix me - update index structures
+