From a0580d0de808cd9aebda0e4b4a0538a3c83fba2f Mon Sep 17 00:00:00 2001 From: "Sean M. Graham" Date: Sat, 24 Jan 2009 20:59:42 -0500 Subject: [PATCH] builds one big file now of all entries and comments for the user --- convertdump.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 92 insertions(+), 7 deletions(-) diff --git a/convertdump.py b/convertdump.py index 6f394ce..f19be5f 100755 --- a/convertdump.py +++ b/convertdump.py @@ -1,11 +1,16 @@ #!/usr/bin/python import xml.dom.minidom +import os +from time import strptime, strftime def getNodeText(doc, nodename): rc = "" - nodelist = doc.getElementsByTagName(nodename)[0].childNodes + try: + nodelist = doc.getElementsByTagName(nodename)[0].childNodes + except: + return "" for node in nodelist: if node.nodeType == node.TEXT_NODE: @@ -15,12 +20,15 @@ def getNodeText(doc, nodename): def appendTextNode(doc, parent, nodename, value): element = doc.createElement(nodename) - textNode = doc.createTextNode(value) - element.appendChild(textNode) + + if( value != "" ): + textNode = doc.createTextNode(value) + element.appendChild(textNode) + parent.appendChild(element) -def addEntryForID(doc, username, id): +def addEntryForId(outDoc, username, id): entryFile = open("%s/L-%s" % (username,id), "r") inDoc = xml.dom.minidom.parse(entryFile) @@ -54,8 +62,64 @@ def addEntryForID(doc, username, id): # XXXSMG: make sure there is a comment file before trying to do anything # with it - commentFile = open("%s/C-%s" % (username,id), "r") - + addCommentsForId(outDoc, entry, username, id) + +def addCommentsForId(outDoc, entry, username, id): + try: + commentFile = open("%s/C-%s" % (username,id), "r") + except: + # there are no comments for this entry + return + + inDoc = xml.dom.minidom.parse(commentFile) + + comments = inDoc.getElementsByTagName("comment") + + for comment in comments: + outComment = outDoc.createElement("comment") + entry.appendChild(outComment) + + # add the item id for the comment + appendTextNode(outDoc, outComment, "itemid", + getNodeText(comment, "id")) + + # convert the time string + timeString = getNodeText(comment, "date") + if( timeString != "" ): + inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ") + outDate = strftime("%Y-%m-%d %H:%M:%S", inDate) + appendTextNode(outDoc, outComment, "eventtime", outDate) + else: + emptyTime = outDoc.createElement("eventtime") + outComment.appendChild(emptyTime) + + # Create an subject element + appendTextNode(outDoc, outComment, "subject", + getNodeText(comment, "subject")) + + # Create an event element + appendTextNode(outDoc, outComment, "event", + getNodeText(comment, "body")) + + # Create the author element + author = outDoc.createElement("author") + outComment.appendChild(author) + + try: + cUser = getNodeText(comment, "user") + except: + cUser = "anonymous" + + appendTextNode(outDoc, author, "name", cUser) + appendTextNode(outDoc, author, "email", cUser + "@livejournal.com") + + # Create the parent_itemid + parentId = getNodeText(comment, "parentid") + if(parentId != ""): + appendTextNode(outDoc, outComment, "parent_itemid", parentId) + + + # Create the minidom document outDoc = xml.dom.minidom.Document() @@ -64,7 +128,28 @@ outDoc = xml.dom.minidom.Document() ljElement = outDoc.createElement("livejournal") outDoc.appendChild(ljElement) -addEntryForID(outDoc, "grahams", "2583") +userDir = os.listdir("grahams") + +highNum = -1 +entryArray = [] + +# get the list of entries +for file in userDir: + if file.startswith("L-"): + entryNum = int(file.replace("L-","")) + + entryArray.append(entryNum) + + if( highNum < entryNum ): + highNum = entryNum + +entryArray.sort() + +# start processing entries +for entry in entryArray: + print entry + addEntryForId(outDoc, "grahams", entry) + # Print our newly created XML print outDoc.toprettyxml(indent=" ") -- 2.39.2