manikrishneshwar's picture
changed UI
5e2fdfe verified
"""
Gradio Space for the BERT+GAT PII redactor.
Drop this folder into a HuggingFace Space (SDK = gradio) and edit
``MODEL_REPO`` below to point at your model repo. Anyone visiting the
Space can paste text and see the redacted output, with each detected
span highlighted by category.
"""
import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer
# --------------------------------------------------------------------------- #
# Configuration
# --------------------------------------------------------------------------- #
MODEL_REPO = "manikrishneshwar/pii-redactor-bert-gat"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Loading {MODEL_REPO} on {DEVICE} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, trust_remote_code=True)
model = AutoModel.from_pretrained(MODEL_REPO, trust_remote_code=True).to(DEVICE).eval()
print("Ready.")
# --------------------------------------------------------------------------- #
# Inference + Gradio formatting
# --------------------------------------------------------------------------- #
def redact(text: str):
if not text or not text.strip():
return [], "", "_(empty input)_"
result = model.predict(text, tokenizer, device=DEVICE)
spans = result["spans"]
# Build the (text, label-or-None) tuples Gradio's HighlightedText expects.
highlights = []
cursor = 0
for sp in sorted(spans, key=lambda s: s["start"]):
if cursor < sp["start"]:
highlights.append((text[cursor:sp["start"]], None))
highlights.append((text[sp["start"]:sp["end"]], sp["label"]))
cursor = sp["end"]
if cursor < len(text):
highlights.append((text[cursor:], None))
detected_md = (
"\n".join(
f"- **{sp['label']}**: `{sp['value']}` "
f"(chars {sp['start']}-{sp['end']})"
for sp in spans
)
if spans else "_No PII detected._"
)
return highlights, result["redacted"], detected_md
EXAMPLES = [
["Email me at john.doe@example.com or call 555-123-4567 anytime."],
["My SSN is 123-45-6789 and my DOB is 03/15/1992."],
["Send the wire to IBAN GB82 WEST 1234 5698 7654 32, routing 021000021."],
["Customer Jane Smith lives at 742 Evergreen Terrace, Springfield, IL."],
]
with gr.Blocks(title="PII Redactor - BERT + GAT") as demo:
gr.Markdown(
"# PII Redactor - BERT + GAT\n"
"Token-level PII detection across 15 categories. Built on a "
"BERT encoder with a Graph Attention Network refinement stage.\n\n"
"Paste any text below, then click **Redact**."
)
with gr.Row():
inp = gr.Textbox(
label="Input text",
placeholder="Paste text containing potential PII...",
lines=4,
)
btn = gr.Button("Redact", variant="primary")
gr.Markdown("### PII Identified")
highlighted = gr.HighlightedText(
label="",
combine_adjacent=True,
# show_legend=True,
)
gr.Markdown("### Redacted text")
redacted_box = gr.Textbox(label="", lines=4)
gr.Markdown("### Detected entities")
detected_md = gr.Markdown()
btn.click(redact, inputs=inp, outputs=[highlighted, redacted_box, detected_md])
inp.submit(redact, inputs=inp, outputs=[highlighted, redacted_box, detected_md])
gr.Examples(EXAMPLES, inputs=inp, label="Try one of these")
if __name__ == "__main__":
demo.launch()