GLM-OCR

+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForVision2Seq
+import io
+import base64
+class EndpointHandler():
+    def __init__(self, path=""):
+        # Use the specific GLM-OCR architecture
+        self.processor = AutoProcessor.from_pretrained(path, trust_remote_code=True)
+        self.model = AutoModelForVision2Seq.from_pretrained(
+            path,
+            trust_remote_code=True,
+            device_map="auto",
+            torch_dtype=torch.bfloat16
+        )
+        self.model.eval()
+    def __call__(self, data):
+        # Decode the image sent from Google Sheets
+        inputs = data.pop("inputs", data)
+        image_data = base64.b64decode(inputs)
+        image = Image.open(io.BytesIO(image_data)).convert("RGB")
+        # Format for GLM-OCR
+        prompt = "Identify Date, Vendor, and list every Item with description, qty, and price. Return as JSON."
+        inputs = self.processor(text=prompt, images=image, return_tensors="pt").to(self.model.device)
+        with torch.no_grad():
+            generated_ids = self.model.generate(**inputs, max_new_tokens=1024)
+        result = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return [{"generated_text": result}]