Spaces:

DebuggerLab
/

bizcards-extractor

Sleeping

App Files Files Community

Tamanna Alam commited on Oct 1, 2025

Commit

6548988

1 Parent(s): d00ff58

Add Gradio app

Browse files

Files changed (6) hide show

README.md +22 -7
app.py +87 -0
biz/gemini.py +49 -0
biz/segmentation.py +80 -0
biz/utils.py +43 -0
requirements.txt +7 -0

README.md CHANGED Viewed

@@ -1,12 +1,27 @@
 ---
-title: Bizcards Extractor
-emoji: 🏃
-colorFrom: pink
-colorTo: indigo
 sdk: gradio
-sdk_version: 5.47.2
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: BizCards Extractor
+emoji: 💼
+colorFrom: indigo
+colorTo: blue
 sdk: gradio
+sdk_version: 4.31.0
 app_file: app.py
+pinned: true
+license: mit
 ---
+Upload a photo with one or multiple Japanese business cards → **Segment** (draw boxes) → **Extract** (Gemini 2.5) → **Download Excel**.
+### How to use
+1. Upload an image.
+2. Click **Segment**.
+3. Click **Extract** (table fills with parsed fields).
+4. Download the Excel file from **Download Excel**.
+### Config
+- Set `GOOGLE_API_KEY` in **Settings → Variables & secrets**.
+- Enable **Allow internet** in Space settings.
+### Notes
+- Supports single or multi-card photos, mixed orientations.
+- Data isn’t persisted; refresh to clear.

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import io, json, os
+import numpy as np
+from PIL import Image
+import gradio as gr
+import cv2
+import pandas as pd
+from biz.segmentation import segment_cards
+from biz.gemini import extract_from_crop
+from biz.utils import crop_to_png_bytes, overlay_boxes, to_excel_file
+# --- helpers ---
+def np_to_pil(img_np) -> Image.Image:
+    if isinstance(img_np, Image.Image): return img_np.convert("RGB")
+    return Image.fromarray(img_np).convert("RGB")
+def pil_to_bgr(pil: Image.Image):
+    return cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
+# --- Gradio functions ---
+def do_segment(image_np):
+    if image_np is None:
+        return None, "[]", gr.update(visible=False), None
+    pil = np_to_pil(image_np)
+    bgr = pil_to_bgr(pil)
+    boxes, W, H = segment_cards(bgr)
+    overlay = overlay_boxes(pil, boxes)
+    return overlay, json.dumps(boxes, ensure_ascii=False), gr.update(visible=True), None
+def do_extract(image_np, boxes_json):
+    if image_np is None or not boxes_json:
+        return pd.DataFrame(), None
+    pil = np_to_pil(image_np)
+    try:
+        boxes = json.loads(boxes_json)
+    except Exception:
+        boxes = []
+    cards = []
+    for b in boxes:
+        crop = crop_to_png_bytes(pil, b["x"], b["y"], b["w"], b["h"])
+        fields = extract_from_crop(crop, source_name="upload")
+        fields["box_id"] = b["id"]
+        cards.append(fields)
+    df = pd.DataFrame(cards)
+    xlsx_path = to_excel_file(cards)
+    return df, xlsx_path
+def clear_all():
+    return None, None, "[]", gr.update(visible=False), pd.DataFrame(), None
+# --- UI ---
+with gr.Blocks(title="BizCards Extractor (Gradio)") as demo:
+    gr.Markdown("## 💼 BizCards Extractor\nUpload → **Segment** → **Extract** → **Download Excel**")
+    with gr.Row():
+        with gr.Column(scale=3):
+            in_img = gr.Image(type="numpy", label="Upload single or multi-card photo")
+            with gr.Row():
+                btn_seg = gr.Button("Segment", variant="primary")
+                btn_ext = gr.Button("Extract", variant="secondary")
+                btn_clear = gr.Button("Clear")
+        with gr.Column(scale=2):
+            out_img = gr.Image(label="Segmented preview (boxes)", interactive=False)
+            out_table = gr.Dataframe(
+                headers=["box_id","company","person_romaji","person_kanji","person_kana",
+                         "title","department","email","phone","website","address_jp","notes","source_name"],
+                wrap=True, height=350
+            )
+            dl = gr.File(label="Download Excel", visible=False)
+    # hidden state for boxes in JSON
+    boxes_state = gr.Textbox(label="boxes_json (debug)", visible=False, value="[]")
+    # wiring
+    btn_seg.click(fn=do_segment, inputs=[in_img],
+                  outputs=[out_img, boxes_state, dl, dl])
+    btn_ext.click(fn=do_extract, inputs=[in_img, boxes_state],
+                  outputs=[out_table, dl])
+    btn_clear.click(fn=clear_all, inputs=[],
+                    outputs=[in_img, out_img, boxes_state, dl, out_table, dl])
+    # show a warning if key missing
+    if not os.getenv("GOOGLE_API_KEY") and not os.getenv("GOOGLE_GENAI_USE_VERTEXAI"):
+        gr.Warning("GOOGLE_API_KEY is not set. Add it in Space → Settings → Variables & secrets.")
+if __name__ == "__main__":
+    demo.queue(max_size=16).launch()

biz/gemini.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os, json
+from typing import Dict
+from google import genai
+from google.genai import types
+SYSTEM_INSTRUCTIONS = """
+You read a cropped Japanese business card image and return strict JSON fields.
+Return ONLY this JSON:
+{
+  "person_kanji": "...", "person_kana": "...", "person_romaji": "...",
+  "title": "...", "department": "...", "company": "...",
+  "email": ["..."], "phone": ["..."],
+  "website": "...", "address_jp": "...", "notes": "..."
+}
+Use null / [] for unknowns. Romaji: family-name LAST. No extra text.
+"""
+def make_client():
+    use_vertex = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "false").lower() in ("1","true","yes")
+    if use_vertex:
+        project = os.getenv("GOOGLE_CLOUD_PROJECT")
+        location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
+        if not project:
+            raise RuntimeError("Set GOOGLE_CLOUD_PROJECT or use GOOGLE_API_KEY.")
+        return genai.Client(vertexai=True, project=project, location=location)
+    api_key = os.getenv("GOOGLE_API_KEY")
+    if not api_key:
+        raise RuntimeError("Missing GOOGLE_API_KEY.")
+    return genai.Client(api_key=api_key)
+def extract_from_crop(image_bytes: bytes, source_name: str) -> Dict:
+    client = make_client()
+    img_part = types.Part.from_bytes(data=image_bytes, mime_type="image/png")
+    resp = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents=[types.Content(role="user", parts=[
+            types.Part.from_text(SYSTEM_INSTRUCTIONS), img_part
+        ])],
+        config=types.GenerateContentConfig(
+            temperature=0.2, response_mime_type="application/json"
+        )
+    )
+    try:
+        data = json.loads(resp.text)
+    except Exception:
+        data = {}
+    data["source_name"] = source_name
+    return data

biz/segmentation.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import cv2, numpy as np
+from typing import List, Tuple, TypedDict
+import logging
+log = logging.getLogger(__name__)
+class Box(TypedDict):
+    id: str; x: int; y: int; w: int; h: int; angle: float
+LONG_SIDE       = 1800
+MIN_AREA_FRAC   = 0.006
+MAX_AREA_FRAC   = 0.98
+AR_MIN, AR_MAX  = 0.55, 2.80
+WHITENESS_MIN   = 120
+def _resize_keep(img, long_side=LONG_SIDE):
+    h, w = img.shape[:2]
+    s = long_side / max(h, w)
+    if s < 1.0:
+        img = cv2.resize(img, (int(w*s), int(h*s)), interpolation=cv2.INTER_AREA)
+    else:
+        s = 1.0
+    return img, s
+def _iou(a,b):
+    xa1,ya1,xa2,ya2=a; xb1,yb1,xb2,yb2=b
+    inter = max(0,min(xa2,xb2)-max(xa1,xb1)) * max(0,min(ya2,yb2)-max(ya1,yb1))
+    if inter == 0: return 0.0
+    area_a=(xa2-xa1)*(ya2-ya1); area_b=(xb2-xb1)*(yb2-yb1)
+    return inter / (area_a+area_b-inter+1e-6)
+def segment_cards(image_bgr) -> Tuple[List[Box], int, int]:
+    H0, W0 = image_bgr.shape[:2]
+    work, s = _resize_keep(image_bgr, LONG_SIDE)
+    H, W = work.shape[:2]; back = 1.0/s
+    gray = cv2.cvtColor(work, cv2.COLOR_BGR2GRAY)
+    gray = cv2.bilateralFilter(gray, 7, 50, 50)
+    _, bin_ = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    bin_ = cv2.morphologyEx(bin_, cv2.MORPH_OPEN,  cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)),1)
+    bin_ = cv2.morphologyEx(bin_, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT,(9,9)),1)
+    cnts,_ = cv2.findContours(bin_, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    min_area = MIN_AREA_FRAC * (W*H)
+    max_area = MAX_AREA_FRAC * (W*H)
+    cand=[]
+    for c in cnts:
+        a=cv2.contourArea(c)
+        if a<min_area or a>max_area: continue
+        r=cv2.minAreaRect(c); (cx,cy),(rw,rh),ang=r
+        if rw<10 or rh<10: continue
+        ar=max(rw,rh)/max(1.0,min(rw,rh))
+        if ar<AR_MIN or ar>AR_MAX: continue
+        pts=cv2.boxPoints(r).astype(int)
+        m=np.zeros((H,W),np.uint8); cv2.drawContours(m,[pts],-1,255,-1)
+        if cv2.mean(gray, mask=m)[0] < WHITENESS_MIN: continue
+        xs,ys=pts[:,0],pts[:,1]
+        x1,y1,x2,y2=xs.min(),ys.min(),xs.max(),ys.max()
+        x,y,w,h=int(x1*back),int(y1*back),int((x2-x1)*back),int((y2-y1)*back)
+        x=max(0,x); y=max(0,y); w=min(W0-x,w); h=min(H0-y,h)
+        if w*h<=0: continue
+        cand.append((x,y,w,h,float(ang)))
+    cand.sort(key=lambda z:z[2]*z[3], reverse=True)
+    picked=[]
+    for x,y,w,h,ang in cand:
+        rect=(x,y,x+w,y+h)
+        if all(_iou(rect,(px,py,px+pw,py+ph))<0.20 for px,py,pw,ph,_ in picked):
+            picked.append((x,y,w,h,ang))
+    if not picked:
+        m=int(min(W0,H0)*0.03)
+        x,y=m,m; w,h=max(1,W0-2*m),max(1,H0-2*m)
+        log.info("No contours -> fallback to full-image box.")
+        boxes=[{"id":"card-0","x":x,"y":y,"w":w,"h":h,"angle":0.0}]
+    else:
+        boxes=[{"id":f"card-{i}","x":x,"y":y,"w":w,"h":h,"angle":ang}
+               for i,(x,y,w,h,ang) in enumerate(picked)]
+        boxes.sort(key=lambda b:(b["y"], b["x"]))
+    return boxes, W0, H0

biz/utils.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import io
+from typing import List, Dict
+from PIL import Image, ImageDraw
+import pandas as pd
+import uuid, os
+def crop_to_png_bytes(img_pil: Image.Image, x:int,y:int,w:int,h:int) -> bytes:
+    crop = img_pil.crop((x,y,x+w,y+h))
+    bio = io.BytesIO()
+    crop.save(bio, format="PNG")
+    return bio.getvalue()
+def overlay_boxes(img_pil: Image.Image, boxes: List[Dict]) -> Image.Image:
+    out = img_pil.copy()
+    dr = ImageDraw.Draw(out)
+    for b in boxes:
+        x,y,w,h = b["x"],b["y"],b["w"],b["h"]
+        dr.rectangle([x,y,x+w,y+h], outline=(124,58,237), width=6)
+    return out
+def to_excel_file(cards: List[Dict]) -> str:
+    rows=[]
+    for c in cards:
+        rows.append({
+            "box_id": c.get("box_id"),
+            "company": c.get("company"),
+            "person_kanji": c.get("person_kanji"),
+            "person_kana": c.get("person_kana"),
+            "person_romaji": c.get("person_romaji"),
+            "title": c.get("title"),
+            "department": c.get("department"),
+            "email": ", ".join(c.get("email", [])),
+            "phone": ", ".join(c.get("phone", [])),
+            "website": c.get("website"),
+            "address_jp": c.get("address_jp"),
+            "notes": c.get("notes"),
+            "source_name": c.get("source_name"),
+        })
+    df = pd.DataFrame(rows)
+    path = f"/tmp/contacts-{uuid.uuid4().hex[:8]}.xlsx"
+    with pd.ExcelWriter(path, engine="openpyxl") as w:
+        df.to_excel(w, index=False, sheet_name="Contacts")
+    return path

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.31.0
+opencv-python-headless==4.10.0.84
+pillow==10.4.0
+pandas==2.2.2
+openpyxl==3.1.5
+google-genai==0.3.0
+numpy>=1.26