Tamanna Alam commited on
Commit
6548988
·
1 Parent(s): d00ff58

Add Gradio app

Browse files
Files changed (6) hide show
  1. README.md +22 -7
  2. app.py +87 -0
  3. biz/gemini.py +49 -0
  4. biz/segmentation.py +80 -0
  5. biz/utils.py +43 -0
  6. requirements.txt +7 -0
README.md CHANGED
@@ -1,12 +1,27 @@
1
  ---
2
- title: Bizcards Extractor
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.47.2
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: BizCards Extractor
3
+ emoji: 💼
4
+ colorFrom: indigo
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.31.0
8
  app_file: app.py
9
+ pinned: true
10
+ license: mit
11
  ---
12
 
13
+ Upload a photo with one or multiple Japanese business cards → **Segment** (draw boxes) → **Extract** (Gemini 2.5) → **Download Excel**.
14
+
15
+ ### How to use
16
+ 1. Upload an image.
17
+ 2. Click **Segment**.
18
+ 3. Click **Extract** (table fills with parsed fields).
19
+ 4. Download the Excel file from **Download Excel**.
20
+
21
+ ### Config
22
+ - Set `GOOGLE_API_KEY` in **Settings → Variables & secrets**.
23
+ - Enable **Allow internet** in Space settings.
24
+
25
+ ### Notes
26
+ - Supports single or multi-card photos, mixed orientations.
27
+ - Data isn’t persisted; refresh to clear.
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io, json, os
2
+ import numpy as np
3
+ from PIL import Image
4
+ import gradio as gr
5
+ import cv2
6
+ import pandas as pd
7
+
8
+ from biz.segmentation import segment_cards
9
+ from biz.gemini import extract_from_crop
10
+ from biz.utils import crop_to_png_bytes, overlay_boxes, to_excel_file
11
+
12
+ # --- helpers ---
13
+ def np_to_pil(img_np) -> Image.Image:
14
+ if isinstance(img_np, Image.Image): return img_np.convert("RGB")
15
+ return Image.fromarray(img_np).convert("RGB")
16
+
17
+ def pil_to_bgr(pil: Image.Image):
18
+ return cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
19
+
20
+ # --- Gradio functions ---
21
+ def do_segment(image_np):
22
+ if image_np is None:
23
+ return None, "[]", gr.update(visible=False), None
24
+ pil = np_to_pil(image_np)
25
+ bgr = pil_to_bgr(pil)
26
+ boxes, W, H = segment_cards(bgr)
27
+ overlay = overlay_boxes(pil, boxes)
28
+ return overlay, json.dumps(boxes, ensure_ascii=False), gr.update(visible=True), None
29
+
30
+ def do_extract(image_np, boxes_json):
31
+ if image_np is None or not boxes_json:
32
+ return pd.DataFrame(), None
33
+ pil = np_to_pil(image_np)
34
+ try:
35
+ boxes = json.loads(boxes_json)
36
+ except Exception:
37
+ boxes = []
38
+ cards = []
39
+ for b in boxes:
40
+ crop = crop_to_png_bytes(pil, b["x"], b["y"], b["w"], b["h"])
41
+ fields = extract_from_crop(crop, source_name="upload")
42
+ fields["box_id"] = b["id"]
43
+ cards.append(fields)
44
+ df = pd.DataFrame(cards)
45
+ xlsx_path = to_excel_file(cards)
46
+ return df, xlsx_path
47
+
48
+ def clear_all():
49
+ return None, None, "[]", gr.update(visible=False), pd.DataFrame(), None
50
+
51
+ # --- UI ---
52
+ with gr.Blocks(title="BizCards Extractor (Gradio)") as demo:
53
+ gr.Markdown("## 💼 BizCards Extractor\nUpload → **Segment** → **Extract** → **Download Excel**")
54
+
55
+ with gr.Row():
56
+ with gr.Column(scale=3):
57
+ in_img = gr.Image(type="numpy", label="Upload single or multi-card photo")
58
+ with gr.Row():
59
+ btn_seg = gr.Button("Segment", variant="primary")
60
+ btn_ext = gr.Button("Extract", variant="secondary")
61
+ btn_clear = gr.Button("Clear")
62
+ with gr.Column(scale=2):
63
+ out_img = gr.Image(label="Segmented preview (boxes)", interactive=False)
64
+ out_table = gr.Dataframe(
65
+ headers=["box_id","company","person_romaji","person_kanji","person_kana",
66
+ "title","department","email","phone","website","address_jp","notes","source_name"],
67
+ wrap=True, height=350
68
+ )
69
+ dl = gr.File(label="Download Excel", visible=False)
70
+
71
+ # hidden state for boxes in JSON
72
+ boxes_state = gr.Textbox(label="boxes_json (debug)", visible=False, value="[]")
73
+
74
+ # wiring
75
+ btn_seg.click(fn=do_segment, inputs=[in_img],
76
+ outputs=[out_img, boxes_state, dl, dl])
77
+ btn_ext.click(fn=do_extract, inputs=[in_img, boxes_state],
78
+ outputs=[out_table, dl])
79
+ btn_clear.click(fn=clear_all, inputs=[],
80
+ outputs=[in_img, out_img, boxes_state, dl, out_table, dl])
81
+
82
+ # show a warning if key missing
83
+ if not os.getenv("GOOGLE_API_KEY") and not os.getenv("GOOGLE_GENAI_USE_VERTEXAI"):
84
+ gr.Warning("GOOGLE_API_KEY is not set. Add it in Space → Settings → Variables & secrets.")
85
+
86
+ if __name__ == "__main__":
87
+ demo.queue(max_size=16).launch()
biz/gemini.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json
2
+ from typing import Dict
3
+ from google import genai
4
+ from google.genai import types
5
+
6
+ SYSTEM_INSTRUCTIONS = """
7
+ You read a cropped Japanese business card image and return strict JSON fields.
8
+
9
+ Return ONLY this JSON:
10
+ {
11
+ "person_kanji": "...", "person_kana": "...", "person_romaji": "...",
12
+ "title": "...", "department": "...", "company": "...",
13
+ "email": ["..."], "phone": ["..."],
14
+ "website": "...", "address_jp": "...", "notes": "..."
15
+ }
16
+ Use null / [] for unknowns. Romaji: family-name LAST. No extra text.
17
+ """
18
+
19
+ def make_client():
20
+ use_vertex = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "false").lower() in ("1","true","yes")
21
+ if use_vertex:
22
+ project = os.getenv("GOOGLE_CLOUD_PROJECT")
23
+ location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
24
+ if not project:
25
+ raise RuntimeError("Set GOOGLE_CLOUD_PROJECT or use GOOGLE_API_KEY.")
26
+ return genai.Client(vertexai=True, project=project, location=location)
27
+ api_key = os.getenv("GOOGLE_API_KEY")
28
+ if not api_key:
29
+ raise RuntimeError("Missing GOOGLE_API_KEY.")
30
+ return genai.Client(api_key=api_key)
31
+
32
+ def extract_from_crop(image_bytes: bytes, source_name: str) -> Dict:
33
+ client = make_client()
34
+ img_part = types.Part.from_bytes(data=image_bytes, mime_type="image/png")
35
+ resp = client.models.generate_content(
36
+ model="gemini-2.5-flash",
37
+ contents=[types.Content(role="user", parts=[
38
+ types.Part.from_text(SYSTEM_INSTRUCTIONS), img_part
39
+ ])],
40
+ config=types.GenerateContentConfig(
41
+ temperature=0.2, response_mime_type="application/json"
42
+ )
43
+ )
44
+ try:
45
+ data = json.loads(resp.text)
46
+ except Exception:
47
+ data = {}
48
+ data["source_name"] = source_name
49
+ return data
biz/segmentation.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2, numpy as np
2
+ from typing import List, Tuple, TypedDict
3
+ import logging
4
+ log = logging.getLogger(__name__)
5
+
6
+ class Box(TypedDict):
7
+ id: str; x: int; y: int; w: int; h: int; angle: float
8
+
9
+ LONG_SIDE = 1800
10
+ MIN_AREA_FRAC = 0.006
11
+ MAX_AREA_FRAC = 0.98
12
+ AR_MIN, AR_MAX = 0.55, 2.80
13
+ WHITENESS_MIN = 120
14
+
15
+ def _resize_keep(img, long_side=LONG_SIDE):
16
+ h, w = img.shape[:2]
17
+ s = long_side / max(h, w)
18
+ if s < 1.0:
19
+ img = cv2.resize(img, (int(w*s), int(h*s)), interpolation=cv2.INTER_AREA)
20
+ else:
21
+ s = 1.0
22
+ return img, s
23
+
24
+ def _iou(a,b):
25
+ xa1,ya1,xa2,ya2=a; xb1,yb1,xb2,yb2=b
26
+ inter = max(0,min(xa2,xb2)-max(xa1,xb1)) * max(0,min(ya2,yb2)-max(ya1,yb1))
27
+ if inter == 0: return 0.0
28
+ area_a=(xa2-xa1)*(ya2-ya1); area_b=(xb2-xb1)*(yb2-yb1)
29
+ return inter / (area_a+area_b-inter+1e-6)
30
+
31
+ def segment_cards(image_bgr) -> Tuple[List[Box], int, int]:
32
+ H0, W0 = image_bgr.shape[:2]
33
+ work, s = _resize_keep(image_bgr, LONG_SIDE)
34
+ H, W = work.shape[:2]; back = 1.0/s
35
+
36
+ gray = cv2.cvtColor(work, cv2.COLOR_BGR2GRAY)
37
+ gray = cv2.bilateralFilter(gray, 7, 50, 50)
38
+ _, bin_ = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
39
+ bin_ = cv2.morphologyEx(bin_, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)),1)
40
+ bin_ = cv2.morphologyEx(bin_, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT,(9,9)),1)
41
+
42
+ cnts,_ = cv2.findContours(bin_, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
43
+ min_area = MIN_AREA_FRAC * (W*H)
44
+ max_area = MAX_AREA_FRAC * (W*H)
45
+
46
+ cand=[]
47
+ for c in cnts:
48
+ a=cv2.contourArea(c)
49
+ if a<min_area or a>max_area: continue
50
+ r=cv2.minAreaRect(c); (cx,cy),(rw,rh),ang=r
51
+ if rw<10 or rh<10: continue
52
+ ar=max(rw,rh)/max(1.0,min(rw,rh))
53
+ if ar<AR_MIN or ar>AR_MAX: continue
54
+ pts=cv2.boxPoints(r).astype(int)
55
+ m=np.zeros((H,W),np.uint8); cv2.drawContours(m,[pts],-1,255,-1)
56
+ if cv2.mean(gray, mask=m)[0] < WHITENESS_MIN: continue
57
+ xs,ys=pts[:,0],pts[:,1]
58
+ x1,y1,x2,y2=xs.min(),ys.min(),xs.max(),ys.max()
59
+ x,y,w,h=int(x1*back),int(y1*back),int((x2-x1)*back),int((y2-y1)*back)
60
+ x=max(0,x); y=max(0,y); w=min(W0-x,w); h=min(H0-y,h)
61
+ if w*h<=0: continue
62
+ cand.append((x,y,w,h,float(ang)))
63
+
64
+ cand.sort(key=lambda z:z[2]*z[3], reverse=True)
65
+ picked=[]
66
+ for x,y,w,h,ang in cand:
67
+ rect=(x,y,x+w,y+h)
68
+ if all(_iou(rect,(px,py,px+pw,py+ph))<0.20 for px,py,pw,ph,_ in picked):
69
+ picked.append((x,y,w,h,ang))
70
+
71
+ if not picked:
72
+ m=int(min(W0,H0)*0.03)
73
+ x,y=m,m; w,h=max(1,W0-2*m),max(1,H0-2*m)
74
+ log.info("No contours -> fallback to full-image box.")
75
+ boxes=[{"id":"card-0","x":x,"y":y,"w":w,"h":h,"angle":0.0}]
76
+ else:
77
+ boxes=[{"id":f"card-{i}","x":x,"y":y,"w":w,"h":h,"angle":ang}
78
+ for i,(x,y,w,h,ang) in enumerate(picked)]
79
+ boxes.sort(key=lambda b:(b["y"], b["x"]))
80
+ return boxes, W0, H0
biz/utils.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ from typing import List, Dict
3
+ from PIL import Image, ImageDraw
4
+ import pandas as pd
5
+ import uuid, os
6
+
7
+ def crop_to_png_bytes(img_pil: Image.Image, x:int,y:int,w:int,h:int) -> bytes:
8
+ crop = img_pil.crop((x,y,x+w,y+h))
9
+ bio = io.BytesIO()
10
+ crop.save(bio, format="PNG")
11
+ return bio.getvalue()
12
+
13
+ def overlay_boxes(img_pil: Image.Image, boxes: List[Dict]) -> Image.Image:
14
+ out = img_pil.copy()
15
+ dr = ImageDraw.Draw(out)
16
+ for b in boxes:
17
+ x,y,w,h = b["x"],b["y"],b["w"],b["h"]
18
+ dr.rectangle([x,y,x+w,y+h], outline=(124,58,237), width=6)
19
+ return out
20
+
21
+ def to_excel_file(cards: List[Dict]) -> str:
22
+ rows=[]
23
+ for c in cards:
24
+ rows.append({
25
+ "box_id": c.get("box_id"),
26
+ "company": c.get("company"),
27
+ "person_kanji": c.get("person_kanji"),
28
+ "person_kana": c.get("person_kana"),
29
+ "person_romaji": c.get("person_romaji"),
30
+ "title": c.get("title"),
31
+ "department": c.get("department"),
32
+ "email": ", ".join(c.get("email", [])),
33
+ "phone": ", ".join(c.get("phone", [])),
34
+ "website": c.get("website"),
35
+ "address_jp": c.get("address_jp"),
36
+ "notes": c.get("notes"),
37
+ "source_name": c.get("source_name"),
38
+ })
39
+ df = pd.DataFrame(rows)
40
+ path = f"/tmp/contacts-{uuid.uuid4().hex[:8]}.xlsx"
41
+ with pd.ExcelWriter(path, engine="openpyxl") as w:
42
+ df.to_excel(w, index=False, sheet_name="Contacts")
43
+ return path
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio>=4.31.0
2
+ opencv-python-headless==4.10.0.84
3
+ pillow==10.4.0
4
+ pandas==2.2.2
5
+ openpyxl==3.1.5
6
+ google-genai==0.3.0
7
+ numpy>=1.26