]> www.wagner.pp.ru Git - oss/ljdump.git/blobdiff - ljdump.py
automatically fall back to Latin-1 for old entries that aren't UTF-8
[oss/ljdump.git] / ljdump.py
index a6d7db39ec0bc166639d87b8324ccc393332e9b7..6a7e3a2655ff67d88814066c10332d17ef36c788 100755 (executable)
--- a/ljdump.py
+++ b/ljdump.py
@@ -74,7 +74,11 @@ def dumpelement(f, name, e):
         if isinstance(e[k], {}.__class__):
             dumpelement(f, k, e[k])
         else:
-            s = unicode(str(e[k]), "UTF-8")
+            try:
+                s = unicode(str(e[k]), "UTF-8")
+            except UnicodeDecodeError:
+                # fall back to Latin-1 for old entries that aren't UTF-8
+                s = unicode(str(e[k]), "cp1252")
             f.write("<%s>%s</%s>\n" % (k, saxutils.escape(s), k))
     f.write("</%s>\n" % name)