Thursday, August 1, 2019

Convert PDF to Images using Python

We will use Pdf2Image Library

How to Install?

for Linux

brew install poppler
pip3 install pdf2image
pip3 install pillow

for Mac
brew install poppler
pip3 install pdf2image
pip3 install pillow


Sample Application 1
Convert all pdf file in a given path to images

# Seach for path the contains pdfinfo and set it as poppler_path, 
# in my case it was : /usr/local/bin/pdfinfo



from pdf2image import convert_from_path

def ConvertPdfFile2Image(PDFfileName):
    dpi=500
    output_folder=None
    first_page=None
    last_page=None
    fmt='ppm'
    thread_count=1
    userpw=None
    use_cropbox=False
    strict=False
    transparent=False
    single_file=False
    output_file=''
    poppler_path='/usr/local/bin/'

    pages = convert_from_path(PDFfileName, dpi, output_folder, first_page, last_page, fmt, thread_count, userpw, use_cropbox, strict, transparent, single_file, output_file, poppler_path)
    for i in range(len(pages)):
        newfilename = PDFfileName[:-4] + str(i) + '.jpeg'
        pages[i].save(newfilename, 'JPEG')



import os
pdf_dir = r"/Users/rafie/Desktop/"
os.chdir(pdf_dir)

for pdf_file in os.listdir(pdf_dir):
    if pdf_file.endswith(".pdf"):
        ConvertPdfFile2Image(pdf_file)
     





Sample Application 2
convert online pdf to images saved to local path


from pdf2image import convert_from_path
import requests
def ConvertOnlinePdf2Images(pdfFileURL,outputPath):
    dpi=500
    output_folder=None
    first_page=None
    last_page=None
    fmt='ppm'
    thread_count=1
    userpw=None
    use_cropbox=False
    strict=False
    transparent=False
    single_file=False
    output_file=''
    poppler_path='/usr/local/bin/'

    pdfFileBytes = requests.get(pdfFileURL)

    pages = convert_from_bytes( pdfFileBytes.content, dpi, output_folder, first_page, last_page, fmt, thread_count, userpw, use_cropbox, strict, transparent, single_file, output_file, poppler_path)
    for i in range(len(pages)):
        newfilename = outputPath + str(i) + '.jpeg'
        pages[i].save(newfilename, 'JPEG')











Sample Application 3
How to get binary image directly from pdf2image

      try:
         images = pdf2image.convert_from_path(fs.path(filename))
         response = HttpResponse(content_type="image/png")
         images[0].save(response, "png")
         return response
      except:   #return empty image
         error_image = Image.new('RGBA', (1, 1), (255,0,0,0))
         response = HttpResponse(content_type="image/jpeg")
         error_image.save(response, "JPEG")
         return response
how to view
    <img src="/send_image?file={{img}}" width="200" height="200">

No comments: