import os
from fastapi.responses import HTMLResponse
from gradio import Server
from transformers import pipeline
import spaces
app = Server()
print("Loading OpenAI Privacy Filter model...")
classifier = pipeline(
task="token-classification",
model="openai/privacy-filter",
)
print("Model loaded successfully.")
@app.api(name="predict")
@spaces.GPU
def predict(text: str) -> list:
"""Detect PII in the given text and return aggregated spans."""
results = classifier(text)
merged = []
current_entity = None
for res in results:
label = res["entity"]
if label == "O":
if current_entity:
merged.append(current_entity)
current_entity = None
continue
base_label = label.split("-", 1)[-1] if "-" in label else label
if label.startswith("B-") or label.startswith("S-"):
if current_entity:
merged.append(current_entity)
current_entity = {
"entity": base_label,
"score": float(res["score"]),
"start": res["start"],
"end": res["end"],
"word": res["word"]
}
elif label.startswith("I-") or label.startswith("E-"):
if current_entity and current_entity["entity"] == base_label:
# Extend current entity
current_entity["end"] = res["end"]
current_entity["word"] += res["word"].replace("##", "").replace("Ġ", " ")
else:
if current_entity:
merged.append(current_entity)
current_entity = {
"entity": base_label,
"score": float(res["score"]),
"start": res["start"],
"end": res["end"],
"word": res["word"]
}
else:
# No prefix
if current_entity:
merged.append(current_entity)
current_entity = {
"entity": base_label,
"score": float(res["score"]),
"start": res["start"],
"end": res["end"],
"word": res["word"]
}
if current_entity:
merged.append(current_entity)
return merged
@app.get("/")
async def homepage():
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
with open(html_path, "r", encoding="utf-8") as f:
return HTMLResponse(content=f.read())
if __name__ == "__main__":
app.launch(show_error=True)