rgr4y commited on
Commit
4d921db
·
1 Parent(s): 9c5a20a

CPU fallback

Browse files
Files changed (1) hide show
  1. app.py +47 -19
app.py CHANGED
@@ -3,15 +3,27 @@ import gradio as gr
3
  import pandas as pd
4
  from PIL import Image
5
  import fitz # PyMuPDF
 
6
  from transformers import pipeline
7
 
8
- # Pick a lightweight doc classifier. Swap to your preferred HF model.
9
- MODEL_ID = os.getenv("MODEL_ID", "HAMMALE/vit-tiny-classifier-rvlcdip")
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  clf = pipeline(
12
  task="image-classification",
13
- model=MODEL_ID,
14
- device=0 if os.getenv("CUDA_VISIBLE_DEVICES") not in (None, "", "-1") else -1,
15
  )
16
 
17
  def pdf_to_images(pdf_path: str, max_pages: int = 6, dpi: int = 150):
@@ -33,22 +45,27 @@ def run_infer(file_obj, max_pages: int = 6, top_k: int = 5):
33
 
34
  if ext == ".pdf":
35
  images = pdf_to_images(path, max_pages=max_pages)
36
- page_labels = [f"page_{i+1}" for i in range(len(images))]
37
  else:
38
  images = [Image.open(path).convert("RGB")]
39
- page_labels = ["image"]
40
 
41
  rows = []
42
- # Aggregate by summing scores per label across pages (simple + robust)
43
- agg = {}
44
 
45
- for label, img in zip(page_labels, images):
46
  preds = clf(img, top_k=top_k)
47
  for p in preds:
48
- rows.append({"item": label, "label": p["label"], "score": float(p["score"])})
49
- agg[p["label"]] = agg.get(p["label"], 0.0) + float(p["score"])
 
 
50
 
51
- per_page = pd.DataFrame(rows).sort_values(["item", "score"], ascending=[True, False])
 
 
 
 
52
 
53
  agg_df = (
54
  pd.DataFrame([{"label": k, "score_sum": v} for k, v in agg.items()])
@@ -57,22 +74,33 @@ def run_infer(file_obj, max_pages: int = 6, top_k: int = 5):
57
  .reset_index(drop=True)
58
  )
59
 
60
- return per_page, agg_df
 
 
 
 
 
 
 
61
 
62
  demo = gr.Interface(
63
  fn=run_infer,
64
  inputs=[
65
- gr.File(label="Upload PDF/PNG/JPG"),
66
- gr.Slider(1, 30, value=6, step=1, label="Max PDF pages"),
67
  gr.Slider(1, 20, value=5, step=1, label="Top-K labels"),
68
  ],
69
  outputs=[
70
- gr.Dataframe(label="Per-page predictions"),
 
71
  gr.Dataframe(label="Aggregated across pages (sum of scores)"),
72
  ],
73
- title="Document Classifier (PDF/PNG)",
74
- description=f"Model: {MODEL_ID}. Upload a PDF or image to classify document type.",
 
 
 
75
  )
76
 
77
  if __name__ == "__main__":
78
- demo.launch()
 
3
  import pandas as pd
4
  from PIL import Image
5
  import fitz # PyMuPDF
6
+ import torch
7
  from transformers import pipeline
8
 
9
+ GPU_MODEL_ID = os.getenv("GPU_MODEL_ID", "microsoft/dit-base-finetuned-rvlcdip")
10
+ CPU_MODEL_ID = os.getenv("CPU_MODEL_ID", "HAMMALE/vit-tiny-classifier-rvlcdip")
11
+
12
+ # Optional override: set FORCE_CPU=1 in Space variables
13
+ FORCE_CPU = os.getenv("FORCE_CPU", "0") == "1"
14
+
15
+ def pick_device_and_model():
16
+ has_cuda = torch.cuda.is_available() and not FORCE_CPU
17
+ if has_cuda:
18
+ return 0, GPU_MODEL_ID, "cuda"
19
+ return -1, CPU_MODEL_ID, "cpu"
20
+
21
+ DEVICE, ACTIVE_MODEL_ID, ACTIVE_BACKEND = pick_device_and_model()
22
 
23
  clf = pipeline(
24
  task="image-classification",
25
+ model=ACTIVE_MODEL_ID,
26
+ device=DEVICE,
27
  )
28
 
29
  def pdf_to_images(pdf_path: str, max_pages: int = 6, dpi: int = 150):
 
45
 
46
  if ext == ".pdf":
47
  images = pdf_to_images(path, max_pages=max_pages)
48
+ items = [f"page_{i+1}" for i in range(len(images))]
49
  else:
50
  images = [Image.open(path).convert("RGB")]
51
+ items = ["image"]
52
 
53
  rows = []
54
+ agg = {} # sum scores by label across pages
 
55
 
56
+ for item, img in zip(items, images):
57
  preds = clf(img, top_k=top_k)
58
  for p in preds:
59
+ lab = p["label"]
60
+ sc = float(p["score"])
61
+ rows.append({"item": item, "label": lab, "score": sc})
62
+ agg[lab] = agg.get(lab, 0.0) + sc
63
 
64
+ per_item = (
65
+ pd.DataFrame(rows)
66
+ .sort_values(["item", "score"], ascending=[True, False])
67
+ .reset_index(drop=True)
68
+ )
69
 
70
  agg_df = (
71
  pd.DataFrame([{"label": k, "score_sum": v} for k, v in agg.items()])
 
74
  .reset_index(drop=True)
75
  )
76
 
77
+ meta = pd.DataFrame([{
78
+ "backend": ACTIVE_BACKEND,
79
+ "model_id": ACTIVE_MODEL_ID,
80
+ "torch_cuda_available": torch.cuda.is_available(),
81
+ "force_cpu": FORCE_CPU,
82
+ }])
83
+
84
+ return meta, per_item, agg_df
85
 
86
  demo = gr.Interface(
87
  fn=run_infer,
88
  inputs=[
89
+ gr.File(label="Upload PDF / PNG / JPG"),
90
+ gr.Slider(1, 50, value=6, step=1, label="Max PDF pages"),
91
  gr.Slider(1, 20, value=5, step=1, label="Top-K labels"),
92
  ],
93
  outputs=[
94
+ gr.Dataframe(label="Runtime (device/model)"),
95
+ gr.Dataframe(label="Per-page / per-image predictions"),
96
  gr.Dataframe(label="Aggregated across pages (sum of scores)"),
97
  ],
98
+ title="Document Type Classifier (GPU-first, CPU fallback)",
99
+ description=(
100
+ "GPU model if available; otherwise CPU model. "
101
+ "Set GPU_MODEL_ID / CPU_MODEL_ID / FORCE_CPU=1 as Space variables."
102
+ ),
103
  )
104
 
105
  if __name__ == "__main__":
106
+ demo.launch()