3 # Copyright 2009, Sean M. Graham (www.sean-graham.com)
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
10 # - Redistributions of source code must retain the above copyright notice,
11 # this list of conditions and the following disclaimer.
13 # - Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
17 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
20 # EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
23 # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26 # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 import xml.dom.minidom
34 from time import strptime, strftime
36 def getNodeText(doc, nodename):
40 nodelist = doc.getElementsByTagName(nodename)[0].childNodes
45 if node.nodeType == node.TEXT_NODE:
50 def appendTextNode(doc, parent, nodename, value):
53 # make sure value is properly encoded
55 bytes = nodeValue.encode("UTF-8")
57 bytes = nodeValue.encode("cp1252")
58 nodeValue = unicode(bytes, "UTF-8")
60 element = doc.createElement(nodename)
62 if( nodeValue != "" ):
63 textNode = doc.createTextNode(nodeValue)
64 element.appendChild(textNode)
66 parent.appendChild(element)
69 def addEntryForId(outDoc, element, username, id, includeSecure):
70 entryFile = open("%s/L-%s" % (username,id), "r")
71 inDoc = xml.dom.minidom.parse(entryFile)
73 # Create an entry element
74 entry = outDoc.createElement("entry")
76 # Create an itemid element
77 appendTextNode(outDoc, entry, "itemid", getNodeText(inDoc,"itemid"))
79 # Create an eventtime element
80 appendTextNode(outDoc, entry, "eventtime", getNodeText(inDoc, "eventtime"))
82 # Create an subject element
83 appendTextNode(outDoc, entry, "subject", getNodeText(inDoc, "subject"))
85 # Create an event node (special case because for some reason there are two
86 # 'event' elements in the pydump output, which is probably LJ's fault)
87 event = inDoc.getElementsByTagName("event")[0]
88 appendTextNode(outDoc, entry, "event", getNodeText(event, "event"))
90 security = getNodeText(inDoc, "security")
93 # don't append this entry unless the user provided the argument
94 if(includeSecure == False):
95 print("omitting secure entry: L-%s" % id)
98 if(security == "usemask"):
99 print("including allowmask entry: L-%s" % id)
101 # Create an allowmask element
102 maskText = getNodeText(inDoc, "allowmask")
105 appendTextNode(outDoc, entry, "allowmask", maskText)
107 appendTextNode(outDoc, entry, "allowmask", "0")
109 print("including private entry: L-%s" % id)
111 appendTextNode(outDoc, entry, "security", security)
113 # Create a taglist element
114 appendTextNode(outDoc, entry, "taglist", getNodeText(inDoc, "taglist"))
116 # XXXSMG: make sure there is a comment file before trying to do anything
118 addCommentsForId(outDoc, entry, username, id)
120 element.appendChild(entry)
122 def addCommentsForId(outDoc, entry, username, id):
124 commentFile = open("%s/C-%s" % (username,id), "r")
125 except IOError: # there are no comments for this entry
128 inDoc = xml.dom.minidom.parse(commentFile)
130 comments = inDoc.getElementsByTagName("comment")
132 for comment in comments:
133 outComment = outDoc.createElement("comment")
134 entry.appendChild(outComment)
136 # add the item id for the comment
137 appendTextNode(outDoc, outComment, "itemid",
138 getNodeText(comment, "id"))
140 # convert the time string
141 timeString = getNodeText(comment, "date")
142 if( timeString != "" ):
143 inDate = strptime(timeString, "%Y-%m-%dT%H:%M:%SZ")
144 outDate = strftime("%Y-%m-%d %H:%M:%S", inDate)
145 appendTextNode(outDoc, outComment, "eventtime", outDate)
147 emptyTime = outDoc.createElement("eventtime")
148 outComment.appendChild(emptyTime)
150 # Create an subject element
151 appendTextNode(outDoc, outComment, "subject",
152 getNodeText(comment, "subject"))
154 # Create an event element
155 appendTextNode(outDoc, outComment, "event",
156 getNodeText(comment, "body"))
158 # Create the author element
159 author = outDoc.createElement("author")
160 outComment.appendChild(author)
163 cUser = getNodeText(comment, "user")
167 appendTextNode(outDoc, author, "name", cUser)
168 appendTextNode(outDoc, author, "email", cUser + "@livejournal.com")
170 # Create the parent_itemid
171 parentId = getNodeText(comment, "parentid")
173 appendTextNode(outDoc, outComment, "parent_itemid", parentId)
176 print( "Usage: convertdump.py [arguments]" )
178 This will convert a pydump archive into something compatible with the
179 WordPress LiveJournal importer. This is the same format used by the Windows
183 -u --user username of archive to process [required]
184 -l --limit limit the number of entries in each xml file (default 250)
185 -i --insecure include private and protected entries in the output
186 -h --help show this help page
189 ./convertdump.py --user stevemartin --limit 200 --insecure
196 includeSecure = False;
198 if( len(argv) == 0 ):
203 opts, args = getopt.getopt(sys.argv[1:], "hu:l:i", ["help",
207 except getopt.GetoptError, err:
208 # print help information and exit:
209 print str(err) # will print something like "option -a not recognized"
216 elif o in ("-u", "--user"):
218 elif o in ("-l", "--limit"):
220 elif o in ("-i", "--insecure"):
221 print( "Warning: Including secure entries in XML output" )
223 elif o in ("-h", "--help"):
227 assert False, "unhandled option"
229 userDir = os.listdir(username)
234 # get the list of entries
236 if file.startswith("L-"):
237 entryNum = int(file.replace("L-",""))
239 entryArray.append(entryNum)
241 if( highNum < entryNum ):
246 # Create the minidom document
247 outDoc = xml.dom.minidom.Document()
249 # Create the <livejournal> base element
250 ljElement = outDoc.createElement("livejournal")
251 outDoc.appendChild(ljElement)
255 # start processing entries
256 for entry in entryArray:
257 addEntryForId(outDoc, ljElement, username, entry, includeSecure)
259 currentFileEntry += 1
261 if( currentFileEntry == entryLimit or entry == entryArray[-1] ):
263 f = open("%s - %s.xml" % (username, entry), "w")
264 tempXML = outDoc.toxml("UTF-8")
269 # Create the minidom document
270 outDoc = xml.dom.minidom.Document()
272 # Create the <livejournal> base element
273 ljElement = outDoc.createElement("livejournal")
274 outDoc.appendChild(ljElement)
276 if __name__ == "__main__":