home
__doc__ = ''' MT TXT 2 Bitakora XML '''
FILENAME = 'migration_ueu365.txt'
POST_DELIMITER = '--------\n'
BODY_DELIMITER = '-----\n'
from StringIO import StringIO
def main():
fp = open(FILENAME, 'r')
raw_data = fp.read()
fp.close()
inbody = False
out = StringIO()
out.write('<posts>')
for post_data in raw_data.split(POST_DELIMITER):
inbody = False
comment_data = []
data = {}
body = ''
for line in post_data.split(BODY_DELIMITER):
if line.startswith('TITLE') and not inbody:
data = extractPostData(line)
elif line.startswith('BODY') and not inbody:
inbody = True
body += line[5:]
elif line.startswith('COMMENT'):
comment_data.append(extractCommentData(line[8:]))
elif inbody:
body += line
try:
out2 = StringIO()
out2.write('<post>')
out2.write('<title>%s</title>' % data['title'])
out2.write('<author>%s</author>' % data['author'])
out2.write('<body><![CDATA[%s]]></body>' % body)
out2.write('<tags>%s</tags>' % data['tags'])
out2.write('<date>%s</date>' % data['date'])
out2.write('<comments>')
for comment in comment_data:
out2.write('<comment>')
out2.write('<author>%s</author>' % comment['author'])
out2.write('<body><![CDATA[%s]]></body>' % comment['body'])
out2.write('<url>%s</url>' % comment.get('url', ''))
out2.write('<email>%s</email>' % comment.get('email', ''))
out2.write('<date>%s</date>' % comment['date'])
out2.write('</comment>')
out2.write('</comments>')
out2.write('</post>')
except KeyError:
continue
out.write(out2.getvalue())
out.write('</posts>')
fp = open('ueu365.xml', 'w')
fp.write(out.getvalue())
fp.close()
def extractPostData(data):
d = {}
for line in data.split('\n'):
if line.startswith('TITLE:'):
d['title'] = line.split('TITLE:')[1].strip()
elif line.startswith('DATE:'):
d['date'] = line.split('DATE:')[1].strip()
elif line.startswith('CATEGORY:'):
a = d.get('tags', '')
a += line.split('CATEGORY:')[1].strip()
a += ';'
d['tags'] = a
d['author'] = 'ueu365'
return d
def extractCommentData(data):
d = {}
body = ''
for line in data.split('\n'):
if line.startswith('AUTHOR:'):
d['author'] = line.split('AUTHOR:')[1].strip()
elif line.startswith('EMAIL:'):
d['email'] = line.split('EMAIL:')[1].strip()
elif line.startswith('URL:'):
d['url'] = line.split('URL:')[1].strip()
elif line.startswith('DATE:'):
d['date'] = line.split('DATE:')[1].strip()
elif line.startswith('IP:'):
pass
else:
body += line
d['body'] = body
return d
if __name__ == '__main__':
main()