| from colpali_engine.models import ColPali |
| from colpali_engine.models.paligemma.colpali.processing_colpali import ColPaliProcessor |
| from colpali_engine.utils.processing_utils import BaseVisualRetrieverProcessor |
| from colpali_engine.utils.torch_utils import ListDataset, get_torch_device |
| from torch.utils.data import DataLoader |
| import torch |
| from typing import List, cast |
|
|
| |
|
|
| from tqdm import tqdm |
| from PIL import Image |
| import os |
|
|
| import spaces |
|
|
|
|
| |
| torch.cuda.empty_cache() |
|
|
| |
| import dotenv |
| |
| dotenv_file = dotenv.find_dotenv() |
| dotenv.load_dotenv(dotenv_file) |
|
|
| model_name = os.environ['colpali'] |
| device = get_torch_device("cuda") |
|
|
| |
| |
|
|
| current_working_directory = os.getcwd() |
| save_directory = model_name |
| save_directory = os.path.join(current_working_directory, save_directory) |
|
|
| processor_directory = model_name+'_processor' |
| processor_directory = os.path.join(current_working_directory, processor_directory) |
|
|
|
|
|
|
| if not os.path.exists(save_directory): |
| |
| model = ColPali.from_pretrained( |
| model_name, |
| torch_dtype=torch.bfloat16, |
| device_map=device, |
| ).eval() |
| processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name)) |
| os.makedirs(save_directory) |
| print(f"Directory '{save_directory}' created.") |
| model.save_pretrained(save_directory) |
| os.makedirs(processor_directory) |
| processor.save_pretrained(processor_directory) |
|
|
| else: |
|
|
| model = ColPali.from_pretrained(save_directory) |
| processor = ColPaliProcessor.from_pretrained(processor_directory, use_fast=True) |
|
|
|
|
| class ColpaliManager: |
|
|
| |
| def __init__(self, device = "cuda", model_name = model_name): |
|
|
| print(f"Initializing ColpaliManager with device {device} and model {model_name}") |
|
|
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
|
|
| @spaces.GPU |
| def get_images(self, paths: list[str]) -> List[Image.Image]: |
| model.to("cuda") |
| return [Image.open(path) for path in paths] |
|
|
| @spaces.GPU |
| def process_images(self, image_paths:list[str], batch_size=5): |
| model.to("cuda") |
| print(f"Processing {len(image_paths)} image_paths") |
| |
| images = self.get_images(image_paths) |
|
|
| dataloader = DataLoader( |
| dataset=ListDataset[str](images), |
| batch_size=batch_size, |
| shuffle=False, |
| collate_fn=lambda x: processor.process_images(x), |
| ) |
|
|
| ds: List[torch.Tensor] = [] |
| for batch_doc in tqdm(dataloader): |
| with torch.no_grad(): |
| batch_doc = {k: v.to(model.device) for k, v in batch_doc.items()} |
| embeddings_doc = model(**batch_doc) |
| ds.extend(list(torch.unbind(embeddings_doc.to(device)))) |
| |
| ds_np = [d.float().cpu().numpy() for d in ds] |
|
|
| return ds_np |
| |
|
|
| @spaces.GPU |
| def process_text(self, texts: list[str]): |
| model.to("cuda") |
| print(f"Processing {len(texts)} texts") |
|
|
| dataloader = DataLoader( |
| dataset=ListDataset[str](texts), |
| batch_size=5, |
| shuffle=False, |
| collate_fn=lambda x: processor.process_queries(x), |
| ) |
|
|
| qs: List[torch.Tensor] = [] |
| for batch_query in dataloader: |
| with torch.no_grad(): |
| batch_query = {k: v.to(model.device) for k, v in batch_query.items()} |
| embeddings_query = model(**batch_query) |
|
|
| qs.extend(list(torch.unbind(embeddings_query.to(device)))) |
|
|
| qs_np = [q.float().cpu().numpy() for q in qs] |
| model.to("cpu") |
|
|
| return qs_np |
| |
|
|
|
|
|
|