3 # Copyright 2009, Sean M. Graham (www.sean-graham.com)
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
10 # - Redistributions of source code must retain the above copyright notice,
11 # this list of conditions and the following disclaimer.
13 # - Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
17 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
20 # EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
23 # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26 # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 import xml.dom.minidom
33 from time import strptime, strftime
35 def getNodeText(doc, nodename):
39 nodelist = doc.getElementsByTagName(nodename)[0].childNodes
44 if node.nodeType == node.TEXT_NODE:
49 def appendTextNode(doc, parent, nodename, value):
52 # make sure value is properly encoded
54 bytes = nodeValue.encode("UTF-8")
56 bytes = nodeValue.encode("cp1252")
57 nodeValue = unicode(bytes, "UTF-8")
59 element = doc.createElement(nodename)
61 if( nodeValue != "" ):
62 textNode = doc.createTextNode(nodeValue)
63 element.appendChild(textNode)
65 parent.appendChild(element)
68 def addEntryForId(outDoc, element, username, id, includeSecure):
69 entryFile = open("%s/L-%s" % (username,id), "r")
70 inDoc = xml.dom.minidom.parse(entryFile)
72 # Create an entry element
73 entry = outDoc.createElement("entry")
75 # Create an itemid element
76 appendTextNode(outDoc, entry, "itemid", getNodeText(inDoc,"itemid"))
78 # Create an eventtime element
79 appendTextNode(outDoc, entry, "eventtime", getNodeText(inDoc, "eventtime"))
81 # Create an subject element
82 appendTextNode(outDoc, entry, "subject", getNodeText(inDoc, "subject"))
84 # Create an event node (special case because for some reason there are two
85 # 'event' elements in the pydump output, which is probably LJ's fault)
86 event = inDoc.getElementsByTagName("event")[0]
87 appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
89 security = getNodeText(inDoc, "security")
92 # don't append this entry unless the user provided the argument
93 if(includeSecure == False):
94 print("omitting secure entry: L-%s" % id)
97 if(security == "usemask"):
98 print("including allowmask entry: L-%s" % id)
100 # Create an allowmask element
101 maskText = getNodeText(inDoc, "allowmask")
104 appendTextNode(outDoc, entry, "allowmask", maskText)
106 appendTextNode(outDoc, entry, "allowmask", "0")
108 print("including private entry: L-%s" % id)
110 appendTextNode(outDoc, entry, "security", security)
112 # Create a taglist element
113 appendTextNode(outDoc, entry, "taglist", getNodeText(inDoc, "taglist"))
115 # XXXSMG: make sure there is a comment file before trying to do anything
117 addCommentsForId(outDoc, entry, username, id)
119 element.appendChild(entry)
121 def addCommentsForId(outDoc, entry, username, id):
123 commentFile = open("%s/C-%s" % (username,id), "r")
124 except IOError: # there are no comments for this entry
127 inDoc = xml.dom.minidom.parse(commentFile)
129 comments = inDoc.getElementsByTagName("comment")
131 for comment in comments:
132 outComment = outDoc.createElement("comment")
133 entry.appendChild(outComment)
135 # add the item id for the comment
136 appendTextNode(outDoc, outComment, "itemid",
137 getNodeText(comment, "id"))
139 # convert the time string
140 timeString = getNodeText(comment, "date")
141 if( timeString != "" ):
142 inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ")
143 outDate = strftime("%Y-%m-%d %H:%M:%S", inDate)
144 appendTextNode(outDoc, outComment, "eventtime", outDate)
146 emptyTime = outDoc.createElement("eventtime")
147 outComment.appendChild(emptyTime)
149 # Create an subject element
150 appendTextNode(outDoc, outComment, "subject",
151 getNodeText(comment, "subject"))
153 # Create an event element
154 appendTextNode(outDoc, outComment, "event",
155 getNodeText(comment, "body"))
157 # Create the author element
158 author = outDoc.createElement("author")
159 outComment.appendChild(author)
162 cUser = getNodeText(comment, "user")
166 appendTextNode(outDoc, author, "name", cUser)
167 appendTextNode(outDoc, author, "email", cUser + "@livejournal.com")
169 # Create the parent_itemid
170 parentId = getNodeText(comment, "parentid")
172 appendTextNode(outDoc, outComment, "parent_itemid", parentId)
177 includeSecure = False;
180 print( "Usage: convertdump.py <username> <entrylimit>" )
184 entryLimit = int(argv[1])
187 includeSecure = bool(argv[2])
189 includeSecure = False
191 if(includeSecure == True):
192 print( "Warning: Including secure entries in XML output" )
194 userDir = os.listdir(username)
199 # get the list of entries
201 if file.startswith("L-"):
202 entryNum = int(file.replace("L-",""))
204 entryArray.append(entryNum)
206 if( highNum < entryNum ):
211 # Create the minidom document
212 outDoc = xml.dom.minidom.Document()
214 # Create the <livejournal> base element
215 ljElement = outDoc.createElement("livejournal")
216 outDoc.appendChild(ljElement)
220 # start processing entries
221 for entry in entryArray:
222 addEntryForId(outDoc, ljElement, username, entry, includeSecure)
224 currentFileEntry += 1
226 if( currentFileEntry == entryLimit or entry == entryArray[-1] ):
228 f = open("%s - %s.xml" % (username, entry), "w")
229 tempXML = outDoc.toxml("UTF-8")
234 # Create the minidom document
235 outDoc = xml.dom.minidom.Document()
237 # Create the <livejournal> base element
238 ljElement = outDoc.createElement("livejournal")
239 outDoc.appendChild(ljElement)
241 if __name__ == "__main__":