Spaces:

rgr4y
/

classifier

Sleeping

App Files Files Community

rgr4y commited on Feb 11

Commit

4d921db

1 Parent(s): 9c5a20a

CPU fallback

Browse files

Files changed (1) hide show

app.py +47 -19

app.py CHANGED Viewed

@@ -3,15 +3,27 @@ import gradio as gr
 import pandas as pd
 from PIL import Image
 import fitz  # PyMuPDF
 from transformers import pipeline
-# Pick a lightweight doc classifier. Swap to your preferred HF model.
-MODEL_ID = os.getenv("MODEL_ID", "HAMMALE/vit-tiny-classifier-rvlcdip")
 clf = pipeline(
     task="image-classification",
-    model=MODEL_ID,
-    device=0 if os.getenv("CUDA_VISIBLE_DEVICES") not in (None, "", "-1") else -1,
 )
 def pdf_to_images(pdf_path: str, max_pages: int = 6, dpi: int = 150):
@@ -33,22 +45,27 @@ def run_infer(file_obj, max_pages: int = 6, top_k: int = 5):
     if ext == ".pdf":
         images = pdf_to_images(path, max_pages=max_pages)
-        page_labels = [f"page_{i+1}" for i in range(len(images))]
     else:
         images = [Image.open(path).convert("RGB")]
-        page_labels = ["image"]
     rows = []
-    # Aggregate by summing scores per label across pages (simple + robust)
-    agg = {}
-    for label, img in zip(page_labels, images):
         preds = clf(img, top_k=top_k)
         for p in preds:
-            rows.append({"item": label, "label": p["label"], "score": float(p["score"])})
-            agg[p["label"]] = agg.get(p["label"], 0.0) + float(p["score"])
-    per_page = pd.DataFrame(rows).sort_values(["item", "score"], ascending=[True, False])
     agg_df = (
         pd.DataFrame([{"label": k, "score_sum": v} for k, v in agg.items()])
@@ -57,22 +74,33 @@ def run_infer(file_obj, max_pages: int = 6, top_k: int = 5):
         .reset_index(drop=True)
     )
-    return per_page, agg_df
 demo = gr.Interface(
     fn=run_infer,
     inputs=[
-        gr.File(label="Upload PDF/PNG/JPG"),
-        gr.Slider(1, 30, value=6, step=1, label="Max PDF pages"),
         gr.Slider(1, 20, value=5, step=1, label="Top-K labels"),
     ],
     outputs=[
-        gr.Dataframe(label="Per-page predictions"),
         gr.Dataframe(label="Aggregated across pages (sum of scores)"),
     ],
-    title="Document Classifier (PDF/PNG)",
-    description=f"Model: {MODEL_ID}. Upload a PDF or image to classify document type.",
 )
 if __name__ == "__main__":
-    demo.launch()

 import pandas as pd
 from PIL import Image
 import fitz  # PyMuPDF
+import torch
 from transformers import pipeline
+GPU_MODEL_ID = os.getenv("GPU_MODEL_ID", "microsoft/dit-base-finetuned-rvlcdip")
+CPU_MODEL_ID = os.getenv("CPU_MODEL_ID", "HAMMALE/vit-tiny-classifier-rvlcdip")
+# Optional override: set FORCE_CPU=1 in Space variables
+FORCE_CPU = os.getenv("FORCE_CPU", "0") == "1"
+def pick_device_and_model():
+    has_cuda = torch.cuda.is_available() and not FORCE_CPU
+    if has_cuda:
+        return 0, GPU_MODEL_ID, "cuda"
+    return -1, CPU_MODEL_ID, "cpu"
+DEVICE, ACTIVE_MODEL_ID, ACTIVE_BACKEND = pick_device_and_model()
 clf = pipeline(
     task="image-classification",
+    model=ACTIVE_MODEL_ID,
+    device=DEVICE,
 )
 def pdf_to_images(pdf_path: str, max_pages: int = 6, dpi: int = 150):
     if ext == ".pdf":
         images = pdf_to_images(path, max_pages=max_pages)
+        items = [f"page_{i+1}" for i in range(len(images))]
     else:
         images = [Image.open(path).convert("RGB")]
+        items = ["image"]
     rows = []
+    agg = {}  # sum scores by label across pages
+    for item, img in zip(items, images):
         preds = clf(img, top_k=top_k)
         for p in preds:
+            lab = p["label"]
+            sc = float(p["score"])
+            rows.append({"item": item, "label": lab, "score": sc})
+            agg[lab] = agg.get(lab, 0.0) + sc
+    per_item = (
+        pd.DataFrame(rows)
+        .sort_values(["item", "score"], ascending=[True, False])
+        .reset_index(drop=True)
+    )
     agg_df = (
         pd.DataFrame([{"label": k, "score_sum": v} for k, v in agg.items()])
         .reset_index(drop=True)
     )
+    meta = pd.DataFrame([{
+        "backend": ACTIVE_BACKEND,
+        "model_id": ACTIVE_MODEL_ID,
+        "torch_cuda_available": torch.cuda.is_available(),
+        "force_cpu": FORCE_CPU,
+    }])
+    return meta, per_item, agg_df
 demo = gr.Interface(
     fn=run_infer,
     inputs=[
+        gr.File(label="Upload PDF / PNG / JPG"),
+        gr.Slider(1, 50, value=6, step=1, label="Max PDF pages"),
         gr.Slider(1, 20, value=5, step=1, label="Top-K labels"),
     ],
     outputs=[
+        gr.Dataframe(label="Runtime (device/model)"),
+        gr.Dataframe(label="Per-page / per-image predictions"),
         gr.Dataframe(label="Aggregated across pages (sum of scores)"),
     ],
+    title="Document Type Classifier (GPU-first, CPU fallback)",
+    description=(
+        "GPU model if available; otherwise CPU model. "
+        "Set GPU_MODEL_ID / CPU_MODEL_ID / FORCE_CPU=1 as Space variables."
+    ),
 )
 if __name__ == "__main__":
+    demo.launch()