+#!/usr/bin/python
+#
+# ljdump.py - livejournal archiver
+# Greg Hewgill <greg@hewgill.com> http://hewgill.com
+# Version 1.0.3
+#
+# $Id$
+#
+# This program reads the journal entries from a livejournal (or compatible)
+# blog site and archives them in a subdirectory named after the journal name.
+#
+# The configuration is read from "ljdump.config". A sample configuration is
+# provided in "ljdump.config.sample", which should be copied and then edited.
+# The configuration settings are:
+#
+# server - The XMLRPC server URL. This should only need to be changed
+# if you are dumping a journal that is livejournal-compatible
+# but is not livejournal itself.
+#
+# username - The livejournal user name. A subdirectory will be created
+# with this same name to store the journal entries.
+#
+# password - The account password. This password is never sent in the
+# clear; the livejournal "challenge" password mechanism is used.
+#
+# This program may be run as often as needed to bring the backup copy up
+# to date. Both new and updated items are downloaded.
+#
+# LICENSE
+#
+# This software is provided 'as-is', without any express or implied
+# warranty. In no event will the author be held liable for any damages
+# arising from the use of this software.
+#
+# Permission is granted to anyone to use this software for any purpose,
+# including commercial applications, and to alter it and redistribute it
+# freely, subject to the following restrictions:
+#
+# 1. The origin of this software must not be misrepresented; you must not
+# claim that you wrote the original software. If you use this software
+# in a product, an acknowledgment in the product documentation would be
+# appreciated but is not required.
+# 2. Altered source versions must be plainly marked as such, and must not be
+# misrepresented as being the original software.
+# 3. This notice may not be removed or altered from any source distribution.
+#
+# Copyright (c) 2005 Greg Hewgill
+
import codecs, md5, os, pprint, sys, xml.dom.minidom, xmlrpclib
from xml.sax import saxutils
f.write("<%s>%s</%s>\n" % (k, saxutils.escape(s), k))
f.write("</%s>\n" % name)
-def writedump(itemid, event):
- f = codecs.open("archive/"+itemid, "w", "UTF-8")
+def writedump(fn, event):
+ f = codecs.open(fn, "w", "UTF-8")
f.write("""<?xml version="1.0"?>\n""")
dumpelement(f, "event", event)
f.close()
config = xml.dom.minidom.parse("ljdump.config")
+Server = config.documentElement.getElementsByTagName("server")[0].childNodes[0].data
Username = config.documentElement.getElementsByTagName("username")[0].childNodes[0].data
Password = config.documentElement.getElementsByTagName("password")[0].childNodes[0].data
-server = xmlrpclib.ServerProxy("http://livejournal.com/interface/xmlrpc")
+print "Fetching journal entries for: %s" % Username
+try:
+ os.mkdir(Username)
+ print "Created subdirectory: %s" % Username
+except:
+ pass
+
+server = xmlrpclib.ServerProxy(Server)
+
+new = 0
+errors = 0
+
last = ""
+f = open("%s/.last" % Username, "r")
+try:
+ last = f.readline()
+ if last[-1] == '\n':
+ last = last[:len(last)-1]
+ f.close()
+except:
+ pass
+origlast = last
+
while True:
r = server.LJ.XMLRPC.syncitems(dochallenge({
'username': Username,
if len(r['syncitems']) == 0:
break
for item in r['syncitems']:
- #print item['item']
if item['item'][0] == 'L':
- if not os.access("archive/"+item['item'], os.F_OK):
- try:
- e = server.LJ.XMLRPC.getevents(dochallenge({
- 'username': Username,
- 'ver': 1,
- 'selecttype': "one",
- 'itemid': item['item'][2:],
- }, Password))
- writedump(item['item'], e['events'][0])
- except xmlrpclib.Fault, x:
- print "Error getting item: %s" % item['item']
- pprint.pprint(x)
+ print "Fetching journal entry %s (%s)" % (item['item'], item['action'])
+ try:
+ e = server.LJ.XMLRPC.getevents(dochallenge({
+ 'username': Username,
+ 'ver': 1,
+ 'selecttype': "one",
+ 'itemid': item['item'][2:],
+ }, Password))
+ writedump("%s/%s" % (Username, item['item']), e['events'][0])
+ new += 1
+ except xmlrpclib.Fault, x:
+ print "Error getting item: %s" % item['item']
+ pprint.pprint(x)
+ errors += 1
last = item['time']
+f = open("%s/.last" % Username, "w")
+f.write("%s\n" % last)
+f.close()
+if origlast:
+ print "%d new entries (since %s)" % (new, origlast)
+else:
+ print "%d new entries" % new
+if errors > 0:
+ print "%d errors" % errors