Spaces:
Sleeping
Sleeping
Tamanna Alam commited on
Commit ·
6548988
1
Parent(s): d00ff58
Add Gradio app
Browse files- README.md +22 -7
- app.py +87 -0
- biz/gemini.py +49 -0
- biz/segmentation.py +80 -0
- biz/utils.py +43 -0
- requirements.txt +7 -0
README.md
CHANGED
|
@@ -1,12 +1,27 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
-
pinned:
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: BizCards Extractor
|
| 3 |
+
emoji: 💼
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.31.0
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
Upload a photo with one or multiple Japanese business cards → **Segment** (draw boxes) → **Extract** (Gemini 2.5) → **Download Excel**.
|
| 14 |
+
|
| 15 |
+
### How to use
|
| 16 |
+
1. Upload an image.
|
| 17 |
+
2. Click **Segment**.
|
| 18 |
+
3. Click **Extract** (table fills with parsed fields).
|
| 19 |
+
4. Download the Excel file from **Download Excel**.
|
| 20 |
+
|
| 21 |
+
### Config
|
| 22 |
+
- Set `GOOGLE_API_KEY` in **Settings → Variables & secrets**.
|
| 23 |
+
- Enable **Allow internet** in Space settings.
|
| 24 |
+
|
| 25 |
+
### Notes
|
| 26 |
+
- Supports single or multi-card photos, mixed orientations.
|
| 27 |
+
- Data isn’t persisted; refresh to clear.
|
app.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io, json, os
|
| 2 |
+
import numpy as np
|
| 3 |
+
from PIL import Image
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import cv2
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
from biz.segmentation import segment_cards
|
| 9 |
+
from biz.gemini import extract_from_crop
|
| 10 |
+
from biz.utils import crop_to_png_bytes, overlay_boxes, to_excel_file
|
| 11 |
+
|
| 12 |
+
# --- helpers ---
|
| 13 |
+
def np_to_pil(img_np) -> Image.Image:
|
| 14 |
+
if isinstance(img_np, Image.Image): return img_np.convert("RGB")
|
| 15 |
+
return Image.fromarray(img_np).convert("RGB")
|
| 16 |
+
|
| 17 |
+
def pil_to_bgr(pil: Image.Image):
|
| 18 |
+
return cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
|
| 19 |
+
|
| 20 |
+
# --- Gradio functions ---
|
| 21 |
+
def do_segment(image_np):
|
| 22 |
+
if image_np is None:
|
| 23 |
+
return None, "[]", gr.update(visible=False), None
|
| 24 |
+
pil = np_to_pil(image_np)
|
| 25 |
+
bgr = pil_to_bgr(pil)
|
| 26 |
+
boxes, W, H = segment_cards(bgr)
|
| 27 |
+
overlay = overlay_boxes(pil, boxes)
|
| 28 |
+
return overlay, json.dumps(boxes, ensure_ascii=False), gr.update(visible=True), None
|
| 29 |
+
|
| 30 |
+
def do_extract(image_np, boxes_json):
|
| 31 |
+
if image_np is None or not boxes_json:
|
| 32 |
+
return pd.DataFrame(), None
|
| 33 |
+
pil = np_to_pil(image_np)
|
| 34 |
+
try:
|
| 35 |
+
boxes = json.loads(boxes_json)
|
| 36 |
+
except Exception:
|
| 37 |
+
boxes = []
|
| 38 |
+
cards = []
|
| 39 |
+
for b in boxes:
|
| 40 |
+
crop = crop_to_png_bytes(pil, b["x"], b["y"], b["w"], b["h"])
|
| 41 |
+
fields = extract_from_crop(crop, source_name="upload")
|
| 42 |
+
fields["box_id"] = b["id"]
|
| 43 |
+
cards.append(fields)
|
| 44 |
+
df = pd.DataFrame(cards)
|
| 45 |
+
xlsx_path = to_excel_file(cards)
|
| 46 |
+
return df, xlsx_path
|
| 47 |
+
|
| 48 |
+
def clear_all():
|
| 49 |
+
return None, None, "[]", gr.update(visible=False), pd.DataFrame(), None
|
| 50 |
+
|
| 51 |
+
# --- UI ---
|
| 52 |
+
with gr.Blocks(title="BizCards Extractor (Gradio)") as demo:
|
| 53 |
+
gr.Markdown("## 💼 BizCards Extractor\nUpload → **Segment** → **Extract** → **Download Excel**")
|
| 54 |
+
|
| 55 |
+
with gr.Row():
|
| 56 |
+
with gr.Column(scale=3):
|
| 57 |
+
in_img = gr.Image(type="numpy", label="Upload single or multi-card photo")
|
| 58 |
+
with gr.Row():
|
| 59 |
+
btn_seg = gr.Button("Segment", variant="primary")
|
| 60 |
+
btn_ext = gr.Button("Extract", variant="secondary")
|
| 61 |
+
btn_clear = gr.Button("Clear")
|
| 62 |
+
with gr.Column(scale=2):
|
| 63 |
+
out_img = gr.Image(label="Segmented preview (boxes)", interactive=False)
|
| 64 |
+
out_table = gr.Dataframe(
|
| 65 |
+
headers=["box_id","company","person_romaji","person_kanji","person_kana",
|
| 66 |
+
"title","department","email","phone","website","address_jp","notes","source_name"],
|
| 67 |
+
wrap=True, height=350
|
| 68 |
+
)
|
| 69 |
+
dl = gr.File(label="Download Excel", visible=False)
|
| 70 |
+
|
| 71 |
+
# hidden state for boxes in JSON
|
| 72 |
+
boxes_state = gr.Textbox(label="boxes_json (debug)", visible=False, value="[]")
|
| 73 |
+
|
| 74 |
+
# wiring
|
| 75 |
+
btn_seg.click(fn=do_segment, inputs=[in_img],
|
| 76 |
+
outputs=[out_img, boxes_state, dl, dl])
|
| 77 |
+
btn_ext.click(fn=do_extract, inputs=[in_img, boxes_state],
|
| 78 |
+
outputs=[out_table, dl])
|
| 79 |
+
btn_clear.click(fn=clear_all, inputs=[],
|
| 80 |
+
outputs=[in_img, out_img, boxes_state, dl, out_table, dl])
|
| 81 |
+
|
| 82 |
+
# show a warning if key missing
|
| 83 |
+
if not os.getenv("GOOGLE_API_KEY") and not os.getenv("GOOGLE_GENAI_USE_VERTEXAI"):
|
| 84 |
+
gr.Warning("GOOGLE_API_KEY is not set. Add it in Space → Settings → Variables & secrets.")
|
| 85 |
+
|
| 86 |
+
if __name__ == "__main__":
|
| 87 |
+
demo.queue(max_size=16).launch()
|
biz/gemini.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, json
|
| 2 |
+
from typing import Dict
|
| 3 |
+
from google import genai
|
| 4 |
+
from google.genai import types
|
| 5 |
+
|
| 6 |
+
SYSTEM_INSTRUCTIONS = """
|
| 7 |
+
You read a cropped Japanese business card image and return strict JSON fields.
|
| 8 |
+
|
| 9 |
+
Return ONLY this JSON:
|
| 10 |
+
{
|
| 11 |
+
"person_kanji": "...", "person_kana": "...", "person_romaji": "...",
|
| 12 |
+
"title": "...", "department": "...", "company": "...",
|
| 13 |
+
"email": ["..."], "phone": ["..."],
|
| 14 |
+
"website": "...", "address_jp": "...", "notes": "..."
|
| 15 |
+
}
|
| 16 |
+
Use null / [] for unknowns. Romaji: family-name LAST. No extra text.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def make_client():
|
| 20 |
+
use_vertex = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "false").lower() in ("1","true","yes")
|
| 21 |
+
if use_vertex:
|
| 22 |
+
project = os.getenv("GOOGLE_CLOUD_PROJECT")
|
| 23 |
+
location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
|
| 24 |
+
if not project:
|
| 25 |
+
raise RuntimeError("Set GOOGLE_CLOUD_PROJECT or use GOOGLE_API_KEY.")
|
| 26 |
+
return genai.Client(vertexai=True, project=project, location=location)
|
| 27 |
+
api_key = os.getenv("GOOGLE_API_KEY")
|
| 28 |
+
if not api_key:
|
| 29 |
+
raise RuntimeError("Missing GOOGLE_API_KEY.")
|
| 30 |
+
return genai.Client(api_key=api_key)
|
| 31 |
+
|
| 32 |
+
def extract_from_crop(image_bytes: bytes, source_name: str) -> Dict:
|
| 33 |
+
client = make_client()
|
| 34 |
+
img_part = types.Part.from_bytes(data=image_bytes, mime_type="image/png")
|
| 35 |
+
resp = client.models.generate_content(
|
| 36 |
+
model="gemini-2.5-flash",
|
| 37 |
+
contents=[types.Content(role="user", parts=[
|
| 38 |
+
types.Part.from_text(SYSTEM_INSTRUCTIONS), img_part
|
| 39 |
+
])],
|
| 40 |
+
config=types.GenerateContentConfig(
|
| 41 |
+
temperature=0.2, response_mime_type="application/json"
|
| 42 |
+
)
|
| 43 |
+
)
|
| 44 |
+
try:
|
| 45 |
+
data = json.loads(resp.text)
|
| 46 |
+
except Exception:
|
| 47 |
+
data = {}
|
| 48 |
+
data["source_name"] = source_name
|
| 49 |
+
return data
|
biz/segmentation.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2, numpy as np
|
| 2 |
+
from typing import List, Tuple, TypedDict
|
| 3 |
+
import logging
|
| 4 |
+
log = logging.getLogger(__name__)
|
| 5 |
+
|
| 6 |
+
class Box(TypedDict):
|
| 7 |
+
id: str; x: int; y: int; w: int; h: int; angle: float
|
| 8 |
+
|
| 9 |
+
LONG_SIDE = 1800
|
| 10 |
+
MIN_AREA_FRAC = 0.006
|
| 11 |
+
MAX_AREA_FRAC = 0.98
|
| 12 |
+
AR_MIN, AR_MAX = 0.55, 2.80
|
| 13 |
+
WHITENESS_MIN = 120
|
| 14 |
+
|
| 15 |
+
def _resize_keep(img, long_side=LONG_SIDE):
|
| 16 |
+
h, w = img.shape[:2]
|
| 17 |
+
s = long_side / max(h, w)
|
| 18 |
+
if s < 1.0:
|
| 19 |
+
img = cv2.resize(img, (int(w*s), int(h*s)), interpolation=cv2.INTER_AREA)
|
| 20 |
+
else:
|
| 21 |
+
s = 1.0
|
| 22 |
+
return img, s
|
| 23 |
+
|
| 24 |
+
def _iou(a,b):
|
| 25 |
+
xa1,ya1,xa2,ya2=a; xb1,yb1,xb2,yb2=b
|
| 26 |
+
inter = max(0,min(xa2,xb2)-max(xa1,xb1)) * max(0,min(ya2,yb2)-max(ya1,yb1))
|
| 27 |
+
if inter == 0: return 0.0
|
| 28 |
+
area_a=(xa2-xa1)*(ya2-ya1); area_b=(xb2-xb1)*(yb2-yb1)
|
| 29 |
+
return inter / (area_a+area_b-inter+1e-6)
|
| 30 |
+
|
| 31 |
+
def segment_cards(image_bgr) -> Tuple[List[Box], int, int]:
|
| 32 |
+
H0, W0 = image_bgr.shape[:2]
|
| 33 |
+
work, s = _resize_keep(image_bgr, LONG_SIDE)
|
| 34 |
+
H, W = work.shape[:2]; back = 1.0/s
|
| 35 |
+
|
| 36 |
+
gray = cv2.cvtColor(work, cv2.COLOR_BGR2GRAY)
|
| 37 |
+
gray = cv2.bilateralFilter(gray, 7, 50, 50)
|
| 38 |
+
_, bin_ = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 39 |
+
bin_ = cv2.morphologyEx(bin_, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)),1)
|
| 40 |
+
bin_ = cv2.morphologyEx(bin_, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT,(9,9)),1)
|
| 41 |
+
|
| 42 |
+
cnts,_ = cv2.findContours(bin_, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 43 |
+
min_area = MIN_AREA_FRAC * (W*H)
|
| 44 |
+
max_area = MAX_AREA_FRAC * (W*H)
|
| 45 |
+
|
| 46 |
+
cand=[]
|
| 47 |
+
for c in cnts:
|
| 48 |
+
a=cv2.contourArea(c)
|
| 49 |
+
if a<min_area or a>max_area: continue
|
| 50 |
+
r=cv2.minAreaRect(c); (cx,cy),(rw,rh),ang=r
|
| 51 |
+
if rw<10 or rh<10: continue
|
| 52 |
+
ar=max(rw,rh)/max(1.0,min(rw,rh))
|
| 53 |
+
if ar<AR_MIN or ar>AR_MAX: continue
|
| 54 |
+
pts=cv2.boxPoints(r).astype(int)
|
| 55 |
+
m=np.zeros((H,W),np.uint8); cv2.drawContours(m,[pts],-1,255,-1)
|
| 56 |
+
if cv2.mean(gray, mask=m)[0] < WHITENESS_MIN: continue
|
| 57 |
+
xs,ys=pts[:,0],pts[:,1]
|
| 58 |
+
x1,y1,x2,y2=xs.min(),ys.min(),xs.max(),ys.max()
|
| 59 |
+
x,y,w,h=int(x1*back),int(y1*back),int((x2-x1)*back),int((y2-y1)*back)
|
| 60 |
+
x=max(0,x); y=max(0,y); w=min(W0-x,w); h=min(H0-y,h)
|
| 61 |
+
if w*h<=0: continue
|
| 62 |
+
cand.append((x,y,w,h,float(ang)))
|
| 63 |
+
|
| 64 |
+
cand.sort(key=lambda z:z[2]*z[3], reverse=True)
|
| 65 |
+
picked=[]
|
| 66 |
+
for x,y,w,h,ang in cand:
|
| 67 |
+
rect=(x,y,x+w,y+h)
|
| 68 |
+
if all(_iou(rect,(px,py,px+pw,py+ph))<0.20 for px,py,pw,ph,_ in picked):
|
| 69 |
+
picked.append((x,y,w,h,ang))
|
| 70 |
+
|
| 71 |
+
if not picked:
|
| 72 |
+
m=int(min(W0,H0)*0.03)
|
| 73 |
+
x,y=m,m; w,h=max(1,W0-2*m),max(1,H0-2*m)
|
| 74 |
+
log.info("No contours -> fallback to full-image box.")
|
| 75 |
+
boxes=[{"id":"card-0","x":x,"y":y,"w":w,"h":h,"angle":0.0}]
|
| 76 |
+
else:
|
| 77 |
+
boxes=[{"id":f"card-{i}","x":x,"y":y,"w":w,"h":h,"angle":ang}
|
| 78 |
+
for i,(x,y,w,h,ang) in enumerate(picked)]
|
| 79 |
+
boxes.sort(key=lambda b:(b["y"], b["x"]))
|
| 80 |
+
return boxes, W0, H0
|
biz/utils.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
from PIL import Image, ImageDraw
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import uuid, os
|
| 6 |
+
|
| 7 |
+
def crop_to_png_bytes(img_pil: Image.Image, x:int,y:int,w:int,h:int) -> bytes:
|
| 8 |
+
crop = img_pil.crop((x,y,x+w,y+h))
|
| 9 |
+
bio = io.BytesIO()
|
| 10 |
+
crop.save(bio, format="PNG")
|
| 11 |
+
return bio.getvalue()
|
| 12 |
+
|
| 13 |
+
def overlay_boxes(img_pil: Image.Image, boxes: List[Dict]) -> Image.Image:
|
| 14 |
+
out = img_pil.copy()
|
| 15 |
+
dr = ImageDraw.Draw(out)
|
| 16 |
+
for b in boxes:
|
| 17 |
+
x,y,w,h = b["x"],b["y"],b["w"],b["h"]
|
| 18 |
+
dr.rectangle([x,y,x+w,y+h], outline=(124,58,237), width=6)
|
| 19 |
+
return out
|
| 20 |
+
|
| 21 |
+
def to_excel_file(cards: List[Dict]) -> str:
|
| 22 |
+
rows=[]
|
| 23 |
+
for c in cards:
|
| 24 |
+
rows.append({
|
| 25 |
+
"box_id": c.get("box_id"),
|
| 26 |
+
"company": c.get("company"),
|
| 27 |
+
"person_kanji": c.get("person_kanji"),
|
| 28 |
+
"person_kana": c.get("person_kana"),
|
| 29 |
+
"person_romaji": c.get("person_romaji"),
|
| 30 |
+
"title": c.get("title"),
|
| 31 |
+
"department": c.get("department"),
|
| 32 |
+
"email": ", ".join(c.get("email", [])),
|
| 33 |
+
"phone": ", ".join(c.get("phone", [])),
|
| 34 |
+
"website": c.get("website"),
|
| 35 |
+
"address_jp": c.get("address_jp"),
|
| 36 |
+
"notes": c.get("notes"),
|
| 37 |
+
"source_name": c.get("source_name"),
|
| 38 |
+
})
|
| 39 |
+
df = pd.DataFrame(rows)
|
| 40 |
+
path = f"/tmp/contacts-{uuid.uuid4().hex[:8]}.xlsx"
|
| 41 |
+
with pd.ExcelWriter(path, engine="openpyxl") as w:
|
| 42 |
+
df.to_excel(w, index=False, sheet_name="Contacts")
|
| 43 |
+
return path
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.31.0
|
| 2 |
+
opencv-python-headless==4.10.0.84
|
| 3 |
+
pillow==10.4.0
|
| 4 |
+
pandas==2.2.2
|
| 5 |
+
openpyxl==3.1.5
|
| 6 |
+
google-genai==0.3.0
|
| 7 |
+
numpy>=1.26
|