21 lines
1.0 KiB
Python
21 lines
1.0 KiB
Python
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
|
import torch
|
|
from PIL import Image
|
|
from ..process import pad_image
|
|
#There are some issues with the Hugging Face version of the BLIP2-opt model.
|
|
class BLIP2:
|
|
def __init__(self, model_path, device = "cuda") -> None:
|
|
self.processor = Blip2Processor.from_pretrained(model_path)
|
|
self.model = Blip2ForConditionalGeneration.from_pretrained(
|
|
model_path, torch_dtype=torch.float16).to(device)
|
|
self.model.eval()
|
|
self.device = device
|
|
def generate(self, image, question, pad=True):
|
|
prompt =f'Question: {question} Answer:'
|
|
image = Image.open(image)
|
|
if pad:
|
|
image = pad_image(image, (224,224))
|
|
inputs = self.processor(images=image, text=prompt, return_tensors="pt").to(self.device, torch.float16)
|
|
generated_ids = self.model.generate(**inputs)
|
|
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
|
return generated_text |