]> www.wagner.pp.ru Git - oss/ljdump.git/commitdiff
add community download support
authorGreg Hewgill <greg@hewgill.com>
Tue, 13 Jan 2009 10:11:20 +0000 (23:11 +1300)
committerGreg Hewgill <greg@hewgill.com>
Tue, 13 Jan 2009 10:11:20 +0000 (23:11 +1300)
README.txt
ljdump.py

index d00a7d0bb25d121b63c4639af98627599cedba42..5858b9b3d2204500c0f05bb0a81a01a0bdc30ae8 100644 (file)
@@ -9,6 +9,10 @@ directly, or you may need to open a Terminal/Command Prompt window to run it.
 Either way, it will prompt you for your Livejournal username and password,
 then download all your journal entries, comments, and userpics.
 
 Either way, it will prompt you for your Livejournal username and password,
 then download all your journal entries, comments, and userpics.
 
+You may optionally download entries from a different journal (a community)
+where you are a member. If you are a community maintainer, you can also
+download comments from the community.
+
 If you want to save your username and password so you don't have to type
 it every time you run ljdump, you can save it in the configuration file.
 
 If you want to save your username and password so you don't have to type
 it every time you run ljdump, you can save it in the configuration file.
 
@@ -26,6 +30,12 @@ The configuration settings are:
   password - The account password. This password is never sent in the
              clear; the livejournal "challenge" password mechanism is used.
 
   password - The account password. This password is never sent in the
              clear; the livejournal "challenge" password mechanism is used.
 
+  journal - Optional: The journal to download entries from. If this is
+            not specified, the "username" journal is downloaded. If this
+            is specified, then only the named journals will be downloaded
+            (this element may be specified more than once to download
+            multiple journals).
+
 This program may be run as often as needed to bring the backup copy up
 to date. Both new and updated items are downloaded.
 
 This program may be run as often as needed to bring the backup copy up
 to date. Both new and updated items are downloaded.
 
index d0f68759fb7f31652a0c8ea3817b66f6845e795a..b48b404fdc40bf25b31cfb3ff26fe527382d9fa5 100755 (executable)
--- a/ljdump.py
+++ b/ljdump.py
@@ -88,8 +88,8 @@ def writedump(fn, event):
     dumpelement(f, "event", event)
     f.close()
 
     dumpelement(f, "event", event)
     f.close()
 
-def writelast(username, lastsync, lastmaxid):
-    f = open("%s/.last" % username, "w")
+def writelast(journal, lastsync, lastmaxid):
+    f = open("%s/.last" % journal, "w")
     f.write("%s\n" % lastsync)
     f.write("%s\n" % lastmaxid)
     f.close()
     f.write("%s\n" % lastsync)
     f.write("%s\n" % lastmaxid)
     f.close()
@@ -107,15 +107,19 @@ def gettext(e):
         return ""
     return e[0].firstChild.nodeValue
 
         return ""
     return e[0].firstChild.nodeValue
 
-def ljdump(Server, Username, Password):
+def ljdump(Server, Username, Password, Journal):
     m = re.search("(.*)/interface/xmlrpc", Server)
     if m:
         Server = m.group(1)
     m = re.search("(.*)/interface/xmlrpc", Server)
     if m:
         Server = m.group(1)
+    if Username != Journal:
+        authas = "&authas=%s" % Journal
+    else:
+        authas = ""
 
 
-    print "Fetching journal entries for: %s" % Username
+    print "Fetching journal entries for: %s" % Journal
     try:
     try:
-        os.mkdir(Username)
-        print "Created subdirectory: %s" % Username
+        os.mkdir(Journal)
+        print "Created subdirectory: %s" % Journal
     except:
         pass
 
     except:
         pass
 
@@ -130,7 +134,7 @@ def ljdump(Server, Username, Password):
     lastsync = ""
     lastmaxid = 0
     try:
     lastsync = ""
     lastmaxid = 0
     try:
-        f = open("%s/.last" % Username, "r")
+        f = open("%s/.last" % Journal, "r")
         lastsync = f.readline()
         if lastsync[-1] == '\n':
             lastsync = lastsync[:len(lastsync)-1]
         lastsync = f.readline()
         if lastsync[-1] == '\n':
             lastsync = lastsync[:len(lastsync)-1]
@@ -160,6 +164,7 @@ def ljdump(Server, Username, Password):
             'username': Username,
             'ver': 1,
             'lastsync': lastsync,
             'username': Username,
             'ver': 1,
             'lastsync': lastsync,
+            'usejournal': Journal,
         }, Password))
         #pprint.pprint(r)
         if len(r['syncitems']) == 0:
         }, Password))
         #pprint.pprint(r)
         if len(r['syncitems']) == 0:
@@ -173,9 +178,10 @@ def ljdump(Server, Username, Password):
                         'ver': 1,
                         'selecttype': "one",
                         'itemid': item['item'][2:],
                         'ver': 1,
                         'selecttype': "one",
                         'itemid': item['item'][2:],
+                        'usejournal': Journal,
                     }, Password))
                     if e['events']:
                     }, Password))
                     if e['events']:
-                        writedump("%s/%s" % (Username, item['item']), e['events'][0])
+                        writedump("%s/%s" % (Journal, item['item']), e['events'][0])
                         newentries += 1
                     else:
                         print "Unexpected empty item: %s" % item['item']
                         newentries += 1
                     else:
                         print "Unexpected empty item: %s" % item['item']
@@ -185,7 +191,7 @@ def ljdump(Server, Username, Password):
                     pprint.pprint(x)
                     errors += 1
             lastsync = item['time']
                     pprint.pprint(x)
                     errors += 1
             lastsync = item['time']
-            writelast(Username, lastsync, lastmaxid)
+            writelast(Journal, lastsync, lastmaxid)
 
     # The following code doesn't work because the server rejects our repeated calls.
     # http://www.livejournal.com/doc/server/ljp.csp.xml-rpc.getevents.html
 
     # The following code doesn't work because the server rejects our repeated calls.
     # http://www.livejournal.com/doc/server/ljp.csp.xml-rpc.getevents.html
@@ -205,21 +211,21 @@ def ljdump(Server, Username, Password):
     #    if len(r['events']) == 0:
     #        break
     #    for item in r['events']:
     #    if len(r['events']) == 0:
     #        break
     #    for item in r['events']:
-    #        writedump("%s/L-%d" % (Username, item['itemid']), item)
+    #        writedump("%s/L-%d" % (Journal, item['itemid']), item)
     #        newentries += 1
     #        lastsync = item['eventtime']
 
     #        newentries += 1
     #        lastsync = item['eventtime']
 
-    print "Fetching journal comments for: %s" % Username
+    print "Fetching journal comments for: %s" % Journal
 
     try:
 
     try:
-        f = open("%s/comment.meta" % Username)
+        f = open("%s/comment.meta" % Journal)
         metacache = pickle.load(f)
         f.close()
     except:
         metacache = {}
 
     try:
         metacache = pickle.load(f)
         f.close()
     except:
         metacache = {}
 
     try:
-        f = open("%s/user.map" % Username)
+        f = open("%s/user.map" % Journal)
         usermap = pickle.load(f)
         f.close()
     except:
         usermap = pickle.load(f)
         f.close()
     except:
@@ -227,9 +233,15 @@ def ljdump(Server, Username, Password):
 
     maxid = lastmaxid
     while True:
 
     maxid = lastmaxid
     while True:
-        r = urllib2.urlopen(urllib2.Request(Server+"/export_comments.bml?get=comment_meta&startid=%d" % (maxid+1), headers = {'Cookie': "ljsession="+ljsession}))
-        meta = xml.dom.minidom.parse(r)
-        r.close()
+        try:
+            try:
+                r = urllib2.urlopen(urllib2.Request(Server+"/export_comments.bml?get=comment_meta&startid=%d%s" % (maxid+1, authas), headers = {'Cookie': "ljsession="+ljsession}))
+                meta = xml.dom.minidom.parse(r)
+            except:
+                print "*** Error fetching comment meta, possibly not community maintainer?"
+                break
+        finally:
+            r.close()
         for c in meta.getElementsByTagName("comment"):
             id = int(c.getAttribute("id"))
             metacache[id] = {
         for c in meta.getElementsByTagName("comment"):
             id = int(c.getAttribute("id"))
             metacache[id] = {
@@ -243,42 +255,26 @@ def ljdump(Server, Username, Password):
         if maxid >= int(meta.getElementsByTagName("maxid")[0].firstChild.nodeValue):
             break
 
         if maxid >= int(meta.getElementsByTagName("maxid")[0].firstChild.nodeValue):
             break
 
-    f = open("%s/comment.meta" % Username, "w")
+    f = open("%s/comment.meta" % Journal, "w")
     pickle.dump(metacache, f)
     f.close()
 
     pickle.dump(metacache, f)
     f.close()
 
-    f = open("%s/user.map" % Username, "w")
+    f = open("%s/user.map" % Journal, "w")
     pickle.dump(usermap, f)
     f.close()
 
     pickle.dump(usermap, f)
     f.close()
 
-    print "Fetching userpics for: %s" % Username
-    f = open("%s/userpics.xml" % Username, "w")
-    print >>f, """<?xml version="1.0"?>"""
-    print >>f, "<userpics>"
-    for p in userpics:
-        print >>f, """<userpic keyword="%s" url="%s" />""" % (p, userpics[p])
-        pic = urllib2.urlopen(userpics[p])
-        ext = MimeExtensions.get(pic.info()["Content-Type"], "")
-        picfn = re.sub(r'[*?\\/:<>"|]', "_", p)
-        try:
-            picfn = codecs.utf_8_decode(picfn)[0]
-            picf = open("%s/%s%s" % (Username, picfn, ext), "wb")
-        except:
-            # for installations where the above utf_8_decode doesn't work
-            picfn = "".join([ord(x) < 128 and x or "_" for x in picfn])
-            picf = open("%s/%s%s" % (Username, picfn, ext), "wb")
-        shutil.copyfileobj(pic, picf)
-        pic.close()
-        picf.close()
-    print >>f, "</userpics>"
-    f.close()
-
     newmaxid = maxid
     maxid = lastmaxid
     while True:
     newmaxid = maxid
     maxid = lastmaxid
     while True:
-        r = urllib2.urlopen(urllib2.Request(Server+"/export_comments.bml?get=comment_body&startid=%d" % (maxid+1), headers = {'Cookie': "ljsession="+ljsession}))
-        meta = xml.dom.minidom.parse(r)
-        r.close()
+        try:
+            try:
+                r = urllib2.urlopen(urllib2.Request(Server+"/export_comments.bml?get=comment_body&startid=%d%s" % (maxid+1, authas), headers = {'Cookie': "ljsession="+ljsession}))
+                meta = xml.dom.minidom.parse(r)
+            except:
+                print "*** Error fetching comment body, possibly not community maintainer?"
+                break
+        finally:
+            r.close()
         for c in meta.getElementsByTagName("comment"):
             id = int(c.getAttribute("id"))
             jitemid = c.getAttribute("jitemid")
         for c in meta.getElementsByTagName("comment"):
             id = int(c.getAttribute("id"))
             jitemid = c.getAttribute("jitemid")
@@ -293,7 +289,7 @@ def ljdump(Server, Username, Password):
             if usermap.has_key(c.getAttribute("posterid")):
                 comment["user"] = usermap[c.getAttribute("posterid")]
             try:
             if usermap.has_key(c.getAttribute("posterid")):
                 comment["user"] = usermap[c.getAttribute("posterid")]
             try:
-                entry = xml.dom.minidom.parse("%s/C-%s" % (Username, jitemid))
+                entry = xml.dom.minidom.parse("%s/C-%s" % (Journal, jitemid))
             except:
                 entry = xml.dom.minidom.getDOMImplementation().createDocument(None, "comments", None)
             found = False
             except:
                 entry = xml.dom.minidom.getDOMImplementation().createDocument(None, "comments", None)
             found = False
@@ -305,7 +301,7 @@ def ljdump(Server, Username, Password):
                 print "Warning: downloaded duplicate comment id %d in jitemid %s" % (id, jitemid)
             else:
                 entry.documentElement.appendChild(createxml(entry, "comment", comment))
                 print "Warning: downloaded duplicate comment id %d in jitemid %s" % (id, jitemid)
             else:
                 entry.documentElement.appendChild(createxml(entry, "comment", comment))
-                f = codecs.open("%s/C-%s" % (Username, jitemid), "w", "UTF-8")
+                f = codecs.open("%s/C-%s" % (Journal, jitemid), "w", "UTF-8")
                 entry.writexml(f)
                 f.close()
                 newcomments += 1
                 entry.writexml(f)
                 f.close()
                 newcomments += 1
@@ -316,7 +312,30 @@ def ljdump(Server, Username, Password):
 
     lastmaxid = maxid
 
 
     lastmaxid = maxid
 
-    writelast(Username, lastsync, lastmaxid)
+    writelast(Journal, lastsync, lastmaxid)
+
+    if Username == Journal:
+        print "Fetching userpics for: %s" % Username
+        f = open("%s/userpics.xml" % Username, "w")
+        print >>f, """<?xml version="1.0"?>"""
+        print >>f, "<userpics>"
+        for p in userpics:
+            print >>f, """<userpic keyword="%s" url="%s" />""" % (p, userpics[p])
+            pic = urllib2.urlopen(userpics[p])
+            ext = MimeExtensions.get(pic.info()["Content-Type"], "")
+            picfn = re.sub(r'[*?\\/:<>"|]', "_", p)
+            try:
+                picfn = codecs.utf_8_decode(picfn)[0]
+                picf = open("%s/%s%s" % (Username, picfn, ext), "wb")
+            except:
+                # for installations where the above utf_8_decode doesn't work
+                picfn = "".join([ord(x) < 128 and x or "_" for x in picfn])
+                picf = open("%s/%s%s" % (Username, picfn, ext), "wb")
+            shutil.copyfileobj(pic, picf)
+            pic.close()
+            picf.close()
+        print >>f, "</userpics>"
+        f.close()
 
     if origlastsync:
         print "%d new entries, %d new comments (since %s)" % (newentries, newcomments, origlastsync)
 
     if origlastsync:
         print "%d new entries, %d new comments (since %s)" % (newentries, newcomments, origlastsync)
@@ -331,6 +350,12 @@ if __name__ == "__main__":
         server = config.documentElement.getElementsByTagName("server")[0].childNodes[0].data
         username = config.documentElement.getElementsByTagName("username")[0].childNodes[0].data
         password = config.documentElement.getElementsByTagName("password")[0].childNodes[0].data
         server = config.documentElement.getElementsByTagName("server")[0].childNodes[0].data
         username = config.documentElement.getElementsByTagName("username")[0].childNodes[0].data
         password = config.documentElement.getElementsByTagName("password")[0].childNodes[0].data
+        journals = config.documentElement.getElementsByTagName("journal")
+        if journals:
+            for e in journals:
+                ljdump(server, username, password, e.childNodes[0].data)
+        else:
+            ljdump(server, username, password, username)
     else:
         from getpass import getpass
         print "ljdump - livejournal archiver"
     else:
         from getpass import getpass
         print "ljdump - livejournal archiver"
@@ -341,4 +366,13 @@ if __name__ == "__main__":
         username = raw_input("Username: ")
         password = getpass("Password: ")
         print
         username = raw_input("Username: ")
         password = getpass("Password: ")
         print
-    ljdump(server, username, password)
+        print "You may back up either your own journal, or a community."
+        print "If you are a community maintainer, you can back up both entries and comments."
+        print "If you are not a maintainer, you can back up only entries."
+        print
+        journal = raw_input("Journal to back up (or hit return to back up '%s'): " % username)
+        print
+        if journal:
+            ljdump(server, username, password, journal)
+        else:
+            ljdump(server, username, password, username)