Files
grounding-dino/pdf_converter.py
2025-08-16 21:16:58 +00:00

47 lines
1.4 KiB
Python

import numpy as np
from pdf2image import convert_from_bytes, convert_from_path
from PIL import Image
class PdfConverter:
"""
A service to convert PDF files to images and resize images,
using pypdfium2 for PDFs and Pillow for images.
"""
def __init__(self, dpi: int):
self._pdfium_initialized = False # Track if PDFium needs explicit init/deinit
self._dpi = dpi
def convert_pdf_to_jpg(self, file_path: str) -> list[Image.Image]:
"""
Converts a PDF file to JPG images at different scales.
"""
pil_images = convert_from_path(file_path, dpi=self._dpi)
return pil_images
def resize_image(self, img: Image.Image, size: tuple[int, int]) -> Image.Image:
"""
Resizes a PIL Image to the specified size.
"""
return img.resize(size, Image.LANCZOS)
@staticmethod
def save_image_as_png(img: Image.Image, file_path: str):
"""
Saves a PIL Image as a PNG file.
"""
img.save(file_path, format="PNG")
@staticmethod
def to_cv2_image(img: Image.Image):
open_cv_image = np.array(img.convert("RGB"))
return open_cv_image[:, :, ::-1].copy()
def convert_pdf_bytes_to_jpg(self, pdf_bytes: bytes) -> list[Image.Image]:
"""
Converts PDF bytes to JPG images at different scales.
"""
pil_images = convert_from_bytes(pdf_bytes, dpi=self._dpi)
return pil_images