Convert the first page of a bunch of PDF files to JPG images

[ permalink ] [ download ]
#!/usr/bin/python

__doc__ = """

Previous requisites:

  * sudo apt-get install netpbm poppler-utils

"""



import os

def main():
    command = 'pdftoppm -f 1 -l 1  %(filename)s %(jpgname)s'
    mv = 'mv %(jpgname)s %(jpgnonumber)s'
    ppm2jpg = 'ppmtojpeg %(filename)s.ppm > %(filename)s.jpg'
    delppm = 'rm %s.ppm'
    for filename in os.listdir('.'):
        if filename.endswith('pdf'):
            filename_wo_suffix = filename[:-4]
            jpgnonumber = '%s.ppm' % filename_wo_suffix

            os.system(command % {'filename': filename,
                                 'jpgname': filename_wo_suffix,
                                 }
                      )

            for newfile in os.listdir('.'):
                if newfile.startswith(filename_wo_suffix + '-') and newfile.endswith('.ppm'):
                    os.system(mv % {'jpgname': newfile,
                                    'jpgnonumber': jpgnonumber,})

                    break
                
            os.system(ppm2jpg % {'filename': filename_wo_suffix})
            os.system(delppm % filename_wo_suffix)
            print '%s done' % filename


if __name__ == '__main__':
    main()
hits counter