MovableType to Bitakora transformer

[ permalink ] [ download ]
__doc__ = ''' MT TXT 2 Bitakora XML '''

FILENAME = 'migration_ueu365.txt'

POST_DELIMITER = '--------\n'
BODY_DELIMITER = '-----\n'

from StringIO import StringIO

def main():
    fp = open(FILENAME, 'r')
    raw_data = fp.read()
    fp.close()
    inbody = False

    out = StringIO()
    out.write('<posts>')

    for post_data in raw_data.split(POST_DELIMITER):
        inbody = False
        comment_data = []
        data = {}
        body = ''
        for line in post_data.split(BODY_DELIMITER):
            if line.startswith('TITLE') and not inbody:
                data = extractPostData(line)
            elif line.startswith('BODY') and not inbody:
                inbody = True
                body += line[5:]
            elif line.startswith('COMMENT'):
                comment_data.append(extractCommentData(line[8:]))
            elif inbody:
                body += line

        
        try:
            out2 = StringIO()
            out2.write('<post>')
            out2.write('<title>%s</title>' % data['title'])
            out2.write('<author>%s</author>' % data['author'])
            out2.write('<body><![CDATA[%s]]></body>' % body)
            out2.write('<tags>%s</tags>' % data['tags'])
            out2.write('<date>%s</date>' % data['date'])
            out2.write('<comments>')
            for comment in comment_data:
                out2.write('<comment>')
                out2.write('<author>%s</author>' % comment['author'])
                out2.write('<body><![CDATA[%s]]></body>' % comment['body'])
                out2.write('<url>%s</url>' % comment.get('url', ''))
                out2.write('<email>%s</email>' % comment.get('email', ''))
                out2.write('<date>%s</date>' % comment['date'])
                out2.write('</comment>')
            out2.write('</comments>')
            out2.write('</post>')
        except KeyError:
            continue
        out.write(out2.getvalue())
            
    out.write('</posts>')
    fp = open('ueu365.xml', 'w')
    fp.write(out.getvalue())
    fp.close()

def extractPostData(data):
    d = {}
    for line in data.split('\n'):
        if line.startswith('TITLE:'):
            d['title'] = line.split('TITLE:')[1].strip()
        elif line.startswith('DATE:'):
            d['date'] = line.split('DATE:')[1].strip()
        elif line.startswith('CATEGORY:'):
            a = d.get('tags', '')
            a += line.split('CATEGORY:')[1].strip()
            a += ';'
            d['tags'] = a
        d['author'] = 'ueu365'

    return d
            
    

def extractCommentData(data):
    d = {}
    body = ''
    for line in data.split('\n'):
        if line.startswith('AUTHOR:'):
            d['author'] = line.split('AUTHOR:')[1].strip()
        elif line.startswith('EMAIL:'):
            d['email'] = line.split('EMAIL:')[1].strip()
        elif line.startswith('URL:'):
            d['url'] = line.split('URL:')[1].strip()
        elif line.startswith('DATE:'):
            d['date'] = line.split('DATE:')[1].strip()
        elif line.startswith('IP:'):
            pass
        else:
            body += line

    d['body'] = body
    return d

if __name__ == '__main__':
    main()
hits counter