Spaces:
Sleeping
Sleeping
| """ | |
| Gradio Space for the BERT+GAT PII redactor. | |
| Drop this folder into a HuggingFace Space (SDK = gradio) and edit | |
| ``MODEL_REPO`` below to point at your model repo. Anyone visiting the | |
| Space can paste text and see the redacted output, with each detected | |
| span highlighted by category. | |
| """ | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModel, AutoTokenizer | |
| # --------------------------------------------------------------------------- # | |
| # Configuration | |
| # --------------------------------------------------------------------------- # | |
| MODEL_REPO = "manikrishneshwar/pii-redactor-bert-gat" | |
| DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print(f"Loading {MODEL_REPO} on {DEVICE} ...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, trust_remote_code=True) | |
| model = AutoModel.from_pretrained(MODEL_REPO, trust_remote_code=True).to(DEVICE).eval() | |
| print("Ready.") | |
| # --------------------------------------------------------------------------- # | |
| # Inference + Gradio formatting | |
| # --------------------------------------------------------------------------- # | |
| def redact(text: str): | |
| if not text or not text.strip(): | |
| return [], "", "_(empty input)_" | |
| result = model.predict(text, tokenizer, device=DEVICE) | |
| spans = result["spans"] | |
| # Build the (text, label-or-None) tuples Gradio's HighlightedText expects. | |
| highlights = [] | |
| cursor = 0 | |
| for sp in sorted(spans, key=lambda s: s["start"]): | |
| if cursor < sp["start"]: | |
| highlights.append((text[cursor:sp["start"]], None)) | |
| highlights.append((text[sp["start"]:sp["end"]], sp["label"])) | |
| cursor = sp["end"] | |
| if cursor < len(text): | |
| highlights.append((text[cursor:], None)) | |
| detected_md = ( | |
| "\n".join( | |
| f"- **{sp['label']}**: `{sp['value']}` " | |
| f"(chars {sp['start']}-{sp['end']})" | |
| for sp in spans | |
| ) | |
| if spans else "_No PII detected._" | |
| ) | |
| return highlights, result["redacted"], detected_md | |
| EXAMPLES = [ | |
| ["Email me at john.doe@example.com or call 555-123-4567 anytime."], | |
| ["My SSN is 123-45-6789 and my DOB is 03/15/1992."], | |
| ["Send the wire to IBAN GB82 WEST 1234 5698 7654 32, routing 021000021."], | |
| ["Customer Jane Smith lives at 742 Evergreen Terrace, Springfield, IL."], | |
| ] | |
| with gr.Blocks(title="PII Redactor - BERT + GAT") as demo: | |
| gr.Markdown( | |
| "# PII Redactor - BERT + GAT\n" | |
| "Token-level PII detection across 15 categories. Built on a " | |
| "BERT encoder with a Graph Attention Network refinement stage.\n\n" | |
| "Paste any text below, then click **Redact**." | |
| ) | |
| with gr.Row(): | |
| inp = gr.Textbox( | |
| label="Input text", | |
| placeholder="Paste text containing potential PII...", | |
| lines=4, | |
| ) | |
| btn = gr.Button("Redact", variant="primary") | |
| gr.Markdown("### PII Identified") | |
| highlighted = gr.HighlightedText( | |
| label="", | |
| combine_adjacent=True, | |
| # show_legend=True, | |
| ) | |
| gr.Markdown("### Redacted text") | |
| redacted_box = gr.Textbox(label="", lines=4) | |
| gr.Markdown("### Detected entities") | |
| detected_md = gr.Markdown() | |
| btn.click(redact, inputs=inp, outputs=[highlighted, redacted_box, detected_md]) | |
| inp.submit(redact, inputs=inp, outputs=[highlighted, redacted_box, detected_md]) | |
| gr.Examples(EXAMPLES, inputs=inp, label="Try one of these") | |
| if __name__ == "__main__": | |
| demo.launch() | |