In some cases you want to convert a PDF to an Image. Here in this gist we show how to convert a PDF stored on an Azure Blob Storage to an JPEG. We use here Azure Function with a Docker Container. The python script use the libary pdf2image https://github.com/Belval/pdf2image
import os, io | |
import logging | |
from PIL import Image | |
import azure.functions as func | |
import tempfile | |
from azure.storage.blob import BlockBlobService | |
from pdf2image import convert_from_path, convert_from_bytes | |
BlobAccount = os.environ['BlobAccount'] | |
BlobKey = os.environ['BlobKey'] | |
BlobItem = os.environ['BlobItem'] | |
def main(myblob: func.InputStream): | |
logging.info(f"Python blob trigger function processed blob \n" | |
f"Name: {myblob.name}\n" | |
f"Blob Size: {myblob.length} bytes") | |
file = myblob.read() | |
image = io.BytesIO(file) | |
blob_service = BlockBlobService(account_name=BlobAccount, account_key=BlobKey) | |
blob_name = myblob.name.replace(BlobItem + "/", "") | |
with open(blob_name, "wb") as outfile: | |
# Copy the BytesIO stream to the output file | |
outfile.write(image.getbuffer()) | |
with tempfile.TemporaryDirectory() as path: | |
pages = convert_from_path(blob_name,dpi=600, output_folder=path) | |
x=1 | |
for page in pages: | |
filename = str(x)+".jpg" | |
page.save(filename, "JPEG") | |
x = x +1 | |
blob_service.create_blob_from_path(BlobItem,filename,filename) | |
os.remove(filename) | |
os.remove(blob_name) |
If you want to know more about our Cloud offers please click here