ysharma's picture
ysharma HF Staff
Update app.py
f78f0c3 verified
"""
DLP Paste-Proxy β€” "Pastebin with a conscience"
================================================
A sleek paste-to-share service. The author pastes PII-rich text and gets
a shareable URL. Recipients at that URL see the OPF-redacted version by
default; a separate "reveal" link guarded by an unguessable token shows
the original.
Why gr.Server? We need four HTTP surfaces that don't map cleanly onto
gr.Blocks event wiring:
* @server.api create_paste - accept paste, run OPF, mint IDs
(queued compute β†’ @gradio/client)
* GET /view/{id} - public redacted view page
* GET /view/{id}?token=... - author's reveal page
* GET /api/paste/{pid} - JSON lookup of an existing paste
plus a background sweeper for auto-expiry.
The create_paste endpoint is the only one that runs the OPF model,
so it is the only one that needs Gradio's queue + ZeroGPU wiring β€”
the other three are pure lookup/rendering and are served as plain
FastAPI routes, which the gradio.Server blog recommends for static
content.
Storage is an in-process dict. That is fine for a public demo β€” the
point is to illustrate the request-composition model; it is NOT a
durable pastebin. Restarting the Space clears all pastes.
"""
from __future__ import annotations
import html
import json
import os
import secrets
import threading
import time
from dataclasses import dataclass
from typing import Optional
import gradio as gr
from fastapi.responses import HTMLResponse, JSONResponse
# spaces is only available on Hugging Face Spaces; degrade gracefully
# when running locally so `python app.py` still works off-GPU.
try:
import spaces
_HAS_SPACES = True
except ImportError:
_HAS_SPACES = False
from opf import predict_text
# ── configuration ─────────────────────────────────────────────────
MAX_PASTE_CHARS = int(os.getenv("MAX_PASTE_CHARS", "50000"))
SWEEP_INTERVAL_SEC = int(os.getenv("SWEEP_INTERVAL_SEC", "30"))
TTL_CHOICES: dict[str, Optional[int]] = {
"never": None,
"1h": 60 * 60,
"24h": 60 * 60 * 24,
"7d": 60 * 60 * 24 * 7,
}
CATEGORIES_META = {
"private_person": {"color": "#E24B4A", "label": "Person"},
"private_date": {"color": "#1E7DD1", "label": "Date"},
"private_address": {"color": "#1D9E75", "label": "Address"},
"private_email": {"color": "#0EA5A1", "label": "Email"},
"account_number": {"color": "#BA7517", "label": "Account"},
"private_url": {"color": "#D85A30", "label": "URL"},
"secret": {"color": "#52525b", "label": "Secret"},
"private_phone": {"color": "#639922", "label": "Phone"},
}
# ── paste store ───────────────────────────────────────────────────
@dataclass
class Paste:
id: str
reveal_token: str
original: str
redacted: str
spans: list[dict]
stats: dict
created_at: float
expires_at: Optional[float]
views: int = 0
reveals: int = 0
PASTES: dict[str, Paste] = {}
LOCK = threading.RLock()
def _store_put(paste: Paste) -> None:
with LOCK:
PASTES[paste.id] = paste
def _store_get(pid: str) -> Optional[Paste]:
with LOCK:
p = PASTES.get(pid)
if p is None:
return None
if p.expires_at is not None and p.expires_at <= time.time():
PASTES.pop(pid, None)
return None
return p
def _sweep_loop() -> None:
while True:
time.sleep(SWEEP_INTERVAL_SEC)
now = time.time()
with LOCK:
expired = [pid for pid, p in PASTES.items()
if p.expires_at is not None and p.expires_at <= now]
for pid in expired:
PASTES.pop(pid, None)
threading.Thread(target=_sweep_loop, daemon=True, name="paste-sweeper").start()
# ── redaction ─────────────────────────────────────────────────────
def redact(text: str, spans: list[dict]) -> str:
"""Replace each detected span with <CATEGORY> right-to-left.
Right-to-left preserves indices for earlier spans while we rewrite
later ones (the v6 model output is non-overlapping, but we still
sort defensively and drop any that would nest)."""
out = text
last_start: Optional[int] = None
for sp in sorted(spans, key=lambda s: s["start"], reverse=True):
s, e = sp["start"], sp["end"]
if last_start is not None and e > last_start:
continue # overlaps a later (earlier-in-text) span; skip
placeholder = f"<{sp['label'].upper()}>"
out = out[:s] + placeholder + out[e:]
last_start = s
return out
def compute_stats(text: str, spans: list[dict]) -> dict:
total = len(text)
pii_chars = sum(s["end"] - s["start"] for s in spans)
by_cat: dict[str, dict[str, int]] = {}
for s in spans:
c = s["label"]
by_cat.setdefault(c, {"count": 0, "chars": 0})
by_cat[c]["count"] += 1
by_cat[c]["chars"] += s["end"] - s["start"]
return {
"total_chars": total,
"pii_chars": pii_chars,
"pii_percentage": round(pii_chars / total * 100, 1) if total else 0.0,
"total_spans": len(spans),
"categories": by_cat,
}
# ── OPF call (GPU-gated on HF Spaces) ─────────────────────────────
if _HAS_SPACES:
@spaces.GPU
def analyze(text: str):
return predict_text(text)
else:
def analyze(text: str):
return predict_text(text)
# ── gr.Server wiring ──────────────────────────────────────────────
server = gr.Server()
@server.get("/", response_class=HTMLResponse)
async def home():
return HTMLResponse(_COMPOSE_HTML)
@server.api(name="create_paste")
def create_paste_api(text: str, ttl: str = "never") -> dict:
"""Scan text with the OPF model, mint a paste id + reveal token,
and return all the metadata the caller needs to build share URLs.
This is a @server.api route (not a plain FastAPI POST) because it
runs the @spaces.GPU-decorated `analyze` β€” we want the request to
go through Gradio's queue so concurrent callers don't stomp on
each other's ZeroGPU allocations. Both the browser (via
@gradio/client) and Python clients (via gradio_client) hit the
same endpoint.
"""
text = (text or "").strip()
if not text:
return {"error": "Paste is empty"}
if len(text) > MAX_PASTE_CHARS:
return {"error": f"Paste exceeds {MAX_PASTE_CHARS:,} characters"}
if ttl not in TTL_CHOICES:
return {"error": f"Unknown ttl {ttl!r}"}
try:
source_text, spans = analyze(text)
except Exception as exc:
return {"error": f"OPF inference failed: {exc}"}
redacted = redact(source_text, spans)
stats = compute_stats(source_text, spans)
pid = secrets.token_urlsafe(6)
reveal_token = secrets.token_urlsafe(22)
ttl_sec = TTL_CHOICES[ttl]
now = time.time()
expires_at = (now + ttl_sec) if ttl_sec is not None else None
_store_put(Paste(
id=pid, reveal_token=reveal_token,
original=source_text, redacted=redacted,
spans=spans, stats=stats,
created_at=now, expires_at=expires_at,
))
return {
"id": pid,
"reveal_token": reveal_token,
"view_path": f"/view/{pid}",
"reveal_path": f"/view/{pid}?token={reveal_token}",
"expires_at": expires_at,
"stats": stats,
"redacted": redacted, # let the frontend render a preview without a second round-trip
"categories_meta": CATEGORIES_META,
}
@server.get("/view/{pid}", response_class=HTMLResponse)
async def view_paste(pid: str, token: Optional[str] = None):
p = _store_get(pid)
if p is None:
return HTMLResponse(_not_found_html(pid), status_code=404)
revealed = bool(token) and secrets.compare_digest(token, p.reveal_token)
with LOCK:
if revealed:
p.reveals += 1
else:
p.views += 1
return HTMLResponse(_render_view(p, revealed))
@server.get("/api/paste/{pid}")
async def api_get_paste(pid: str, token: Optional[str] = None):
"""Plain FastAPI read-only lookup β€” no compute, no queue needed."""
p = _store_get(pid)
if p is None:
return JSONResponse({"error": "not found or expired"}, status_code=404)
revealed = bool(token) and secrets.compare_digest(token, p.reveal_token)
payload = {
"id": p.id,
"created_at": p.created_at,
"expires_at": p.expires_at,
"stats": p.stats,
"views": p.views,
"reveals": p.reveals,
"redacted": p.redacted,
}
if revealed:
payload["original"] = p.original
payload["spans"] = p.spans
return JSONResponse(payload)
# ── HTML rendering ────────────────────────────────────────────────
def _escape(text: str) -> str:
return html.escape(text, quote=False)
def _highlight_html(text: str, spans: list[dict]) -> str:
"""Return HTML for text with each span wrapped in a colored mark,
revealing the original content (used on the reveal page)."""
pieces: list[str] = []
cursor = 0
for sp in sorted(spans, key=lambda s: s["start"]):
s, e = sp["start"], sp["end"]
if s < cursor or e <= s:
continue
if s > cursor:
pieces.append(_escape(text[cursor:s]))
meta = CATEGORIES_META.get(sp["label"])
color = meta["color"] if meta else "#333"
label = meta["label"] if meta else sp["label"]
pieces.append(
f'<mark class="pp-hi" data-cat="{_escape(sp["label"])}" '
f'style="--cat:{color}" title="{_escape(label)}">'
f'{_escape(text[s:e])}'
f'<span class="pp-hi-tag">{_escape(label)}</span>'
f'</mark>'
)
cursor = e
if cursor < len(text):
pieces.append(_escape(text[cursor:]))
return "".join(pieces)
def _redacted_html(redacted: str) -> str:
"""Render the redacted version with <CATEGORY> placeholders as
colored pills so readers can see what kind of data was stripped."""
out: list[str] = []
i = 0
while i < len(redacted):
lt = redacted.find("<", i)
if lt == -1:
out.append(_escape(redacted[i:]))
break
out.append(_escape(redacted[i:lt]))
gt = redacted.find(">", lt + 1)
if gt == -1:
out.append(_escape(redacted[lt:]))
break
tag = redacted[lt + 1:gt]
cat_key = tag.lower()
meta = CATEGORIES_META.get(cat_key)
if meta is None:
out.append(_escape(redacted[lt:gt + 1]))
else:
out.append(
f'<span class="pp-red" data-cat="{_escape(cat_key)}" '
f'style="--cat:{meta["color"]}">'
f'<span class="pp-red-dot"></span>{_escape(meta["label"])}'
f'</span>'
)
i = gt + 1
return "".join(out)
def _format_expiry(paste: Paste) -> str:
if paste.expires_at is None:
return "does not expire"
remaining = paste.expires_at - time.time()
if remaining <= 0:
return "expired"
if remaining < 3600:
return f"expires in {int(remaining // 60)} min"
if remaining < 86400:
return f"expires in {int(remaining // 3600)} h"
return f"expires in {int(remaining // 86400)} d"
def _render_view(p: Paste, revealed: bool) -> str:
stats = p.stats
badges_html = "".join(
f'<span class="pp-badge" style="--cat:{CATEGORIES_META.get(cat, {"color": "#333"})["color"]}">'
f'<span class="pp-badge-dot"></span>'
f'{_escape(CATEGORIES_META.get(cat, {"label": cat})["label"])}'
f'<span class="pp-badge-n">{info["count"]}</span>'
f'</span>'
for cat, info in sorted(stats["categories"].items(),
key=lambda kv: -kv[1]["count"])
) or '<span class="pp-muted">No PII detected in this paste.</span>'
body_html = (
_highlight_html(p.original, p.spans) if revealed
else _redacted_html(p.redacted)
)
mode_banner = (
'<div class="pp-banner pp-banner-reveal">'
'<strong>Private reveal.</strong> This URL contains the reveal token β€” '
'treat it like a password. Anyone with it sees the original text.'
'</div>'
if revealed else
'<div class="pp-banner pp-banner-safe">'
'<strong>Redacted view.</strong> Sensitive spans were stripped before '
'this page was served. The original is only visible via the author\'s reveal link.'
'</div>'
)
view_mode_label = "Original (revealed)" if revealed else "Redacted"
replacements = {
"__PID__": _escape(p.id),
"__MODE__": _escape(view_mode_label),
"__EXPIRY__": _escape(_format_expiry(p)),
"__CREATED__": _escape(time.strftime(
"%Y-%m-%d %H:%M UTC", time.gmtime(p.created_at))),
"__VIEWS__": str(p.views),
"__REVEALS__": str(p.reveals),
"__PCT__": str(stats["pii_percentage"]),
"__SPANS_N__": str(stats["total_spans"]),
"__CHARS_N__": f'{stats["total_chars"]:,}',
"__BADGES__": badges_html,
"__BANNER__": mode_banner,
"__BODY__": body_html,
"__BODY_CLASS__": "pp-body-reveal" if revealed else "pp-body-redacted",
}
out = _VIEW_HTML
for k, v in replacements.items():
out = out.replace(k, v)
return out
def _not_found_html(pid: str) -> str:
return _NOT_FOUND_HTML.replace("{{PID}}", _escape(pid))
# ── compose page (paste editor) ───────────────────────────────────
_CATEGORIES_JSON = json.dumps(CATEGORIES_META)
_SHARED_CSS = r"""
:root{
--bg: #f7f7f8;
--panel: #ffffff;
--panel-2: #f1f1f3;
--ink: #0a0a0a;
--ink-dim: #3f3f46;
--ink-faint: #70707a;
--line: #e4e4e7;
--line-strong: #d4d4d8;
--accent: #0f8a5f;
--accent-ink: #ffffff;
--warn: #b45309;
--primary-bg: #18181b;
--primary-fg: #ffffff;
--radius-lg: 12px;
--radius-md: 8px;
--radius-sm: 5px;
--shadow-xs: 0 1px 1.5px rgba(10,10,10,.04);
--shadow-sm: 0 1px 3px rgba(10,10,10,.06), 0 1px 2px rgba(10,10,10,.04);
--shadow-md: 0 4px 14px rgba(10,10,10,.07), 0 1px 3px rgba(10,10,10,.04);
--font-sans: 'Inter', system-ui, -apple-system, 'Segoe UI', sans-serif;
--font-mono: 'JetBrains Mono', ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
--font-serif: 'Instrument Serif', 'Source Serif 4', Georgia, serif;
}
@media (prefers-color-scheme: dark){
:root{
--bg: #0e0e11;
--panel: #18181c;
--panel-2: #1f1f24;
--ink: #e8e8ea;
--ink-dim: #a8a8ae;
--ink-faint: #70707a;
--line: rgba(255,255,255,0.08);
--line-strong: rgba(255,255,255,0.18);
--accent: #2bb77e;
--accent-ink: #0e0e11;
--warn: #eab308;
--primary-bg: #f0f0f2;
--primary-fg: #0e0e11;
--shadow-xs: none;
--shadow-sm: none;
--shadow-md: none;
}
}
*,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
html,body{height:100%}
body{
font-family:var(--font-sans);
background:var(--bg);
color:var(--ink);
font-size:14px;line-height:1.55;
-webkit-font-smoothing:antialiased;
font-feature-settings:"cv11","ss01";
}
a{color:inherit;text-decoration:underline;text-decoration-color:var(--line-strong);text-underline-offset:3px}
a:hover{text-decoration-color:var(--ink)}
button{font:inherit;color:inherit;background:transparent;border:0;cursor:pointer}
.pp-shell{max-width:1060px;margin:0 auto;padding:36px 20px 56px}
.pp-brand{display:flex;align-items:center;gap:10px;margin-bottom:22px}
.pp-brand-mark{
width:26px;height:26px;border-radius:7px;
background:var(--ink);color:var(--bg);
display:grid;place-items:center;
font-family:var(--font-mono);font-size:13px;font-weight:600;letter-spacing:-0.02em;
}
.pp-brand-name{font-size:13.5px;font-weight:500}
.pp-brand-name .sub{color:var(--ink-faint);font-weight:400;margin-left:6px}
.pp-caps{font-size:10.5px;font-weight:600;letter-spacing:0.09em;text-transform:uppercase;color:var(--ink-dim)}
.pp-hero{margin-bottom:22px}
.pp-hero h1{font-family:var(--font-serif);font-size:38px;line-height:1.08;letter-spacing:-0.015em;font-weight:500;margin-bottom:8px}
.pp-hero p{color:var(--ink-dim);max-width:58ch;font-size:14px}
.pp-banner{padding:10px 14px;border-radius:var(--radius-md);font-size:13px;line-height:1.5;border:0.5px solid var(--line-strong);margin-bottom:16px}
.pp-banner strong{font-weight:600}
.pp-banner-safe{background:color-mix(in srgb, var(--accent) 8%, transparent);border-color:color-mix(in srgb, var(--accent) 26%, var(--line-strong))}
.pp-banner-reveal{background:color-mix(in srgb, var(--warn) 10%, transparent);border-color:color-mix(in srgb, var(--warn) 30%, var(--line-strong))}
"""
_COMPOSE_HTML = r"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>DLP Paste-Proxy β€” Pastebin with a conscience</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
<style>
""" + _SHARED_CSS + r"""
.pp-card{
background:var(--panel);
border:0.5px solid var(--line);
border-radius:var(--radius-lg);
box-shadow:var(--shadow-md);
overflow:hidden;
}
.pp-card-head{
padding:14px 18px;
border-bottom:0.5px solid var(--line);
display:flex;align-items:center;gap:10px;flex-wrap:wrap;
}
.pp-card-head h2{font-size:13.5px;font-weight:500;letter-spacing:-0.005em}
.pp-spacer{flex:1}
.pp-grid{
display:grid;
grid-template-columns:minmax(0,1fr) 280px;
gap:0;
}
.pp-pane{padding:18px 20px 22px}
.pp-pane + .pp-pane{border-left:0.5px solid var(--line);background:var(--panel-2)}
.pp-textarea{
width:100%;min-height:320px;
font-family:var(--font-mono);font-size:13px;line-height:1.55;
color:var(--ink);background:transparent;
border:1px solid var(--line);border-radius:var(--radius-md);
padding:14px 14px;resize:vertical;
transition:border-color .15s,background .15s;
}
.pp-textarea::placeholder{color:var(--ink-faint)}
.pp-textarea:focus{outline:none;border-color:var(--line-strong);background:color-mix(in srgb, var(--ink) 1.5%, transparent)}
.pp-sub{color:var(--ink-faint);font-size:11.5px;font-family:var(--font-mono);margin-top:8px;display:flex;align-items:center;gap:10px;flex-wrap:wrap}
.pp-sub .sep{opacity:.4}
.pp-label{display:block;font-size:11px;font-weight:600;letter-spacing:0.07em;text-transform:uppercase;color:var(--ink-dim);margin:0 0 8px}
.pp-ttl{display:flex;gap:4px;background:var(--panel);border:0.5px solid var(--line);padding:3px;border-radius:var(--radius-md)}
.pp-ttl button{
flex:1;padding:7px 0;font-size:12px;font-weight:500;color:var(--ink-dim);
border-radius:5px;transition:background .12s,color .12s;
}
.pp-ttl button[aria-pressed="true"]{background:var(--ink);color:var(--bg)}
.pp-ttl button:hover:not([aria-pressed="true"]){background:color-mix(in srgb, var(--ink) 4%, transparent);color:var(--ink)}
.pp-hint{font-size:12px;color:var(--ink-faint);margin-top:8px;line-height:1.45}
.pp-btn{
font-size:13px;font-weight:500;padding:10px 14px;
border:0.5px solid var(--line-strong);
border-radius:var(--radius-md);
background:var(--panel);color:var(--ink);
display:inline-flex;align-items:center;justify-content:center;gap:8px;
transition:background .12s,border-color .12s;
}
.pp-btn:hover:not(:disabled){background:color-mix(in srgb, var(--ink) 4%, var(--panel));border-color:var(--ink-dim)}
.pp-btn:disabled{opacity:.55;cursor:not-allowed}
.pp-btn-primary{background:var(--primary-bg);color:var(--primary-fg);border-color:var(--primary-bg);width:100%}
.pp-btn-primary:hover:not(:disabled){background:color-mix(in srgb, var(--primary-bg) 88%, var(--ink));border-color:var(--primary-bg)}
.pp-btn-arr{font-family:var(--font-mono);font-size:11px;opacity:.7}
.pp-success{
display:none;margin-top:24px;padding:22px 22px 24px;
background:var(--panel);border:0.5px solid var(--line);border-radius:var(--radius-lg);box-shadow:var(--shadow-md);
}
.pp-success.on{display:block}
.pp-success h3{font-family:var(--font-serif);font-size:22px;line-height:1.15;font-weight:500;margin-bottom:4px;letter-spacing:-0.01em}
.pp-success .pp-caps{margin-bottom:14px;display:block}
.pp-link{
display:flex;align-items:stretch;gap:0;margin:8px 0 14px;
border:0.5px solid var(--line);border-radius:var(--radius-md);overflow:hidden;background:var(--panel-2);
}
.pp-link input{
flex:1;border:0;background:transparent;padding:10px 12px;
font-family:var(--font-mono);font-size:12px;color:var(--ink);min-width:0;outline:none;
}
.pp-link button{
border-left:0.5px solid var(--line);background:var(--panel);
padding:0 14px;font-size:12px;font-weight:500;color:var(--ink-dim);
transition:background .12s,color .12s;
}
.pp-link button:hover{background:color-mix(in srgb, var(--ink) 4%, var(--panel));color:var(--ink)}
.pp-link-label{display:flex;align-items:baseline;gap:8px;font-size:13px;font-weight:500;margin-top:14px}
.pp-link-label .hint{font-weight:400;color:var(--ink-faint);font-size:12px}
.pp-link-label:first-of-type{margin-top:0}
.pp-link-label .priv{
font-family:var(--font-mono);font-size:10px;font-weight:600;letter-spacing:.06em;
padding:2px 7px;border-radius:4px;
background:color-mix(in srgb, var(--warn) 18%, transparent);
color:color-mix(in srgb, var(--warn) 70%, var(--ink));
text-transform:uppercase;
}
.pp-preview-row{display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-top:16px}
.pp-preview{background:var(--panel-2);border:0.5px solid var(--line);border-radius:var(--radius-md);padding:12px 14px 14px;font-family:var(--font-serif);font-size:14.5px;line-height:1.55;min-height:130px;max-height:260px;overflow:auto}
.pp-preview .pp-caps{display:block;margin-bottom:8px;font-family:var(--font-sans);font-size:10px;color:var(--ink-faint)}
.pp-err{display:none;margin-top:12px;padding:10px 12px;border-radius:var(--radius-md);background:color-mix(in srgb, #dc2626 9%, transparent);border:0.5px solid color-mix(in srgb, #dc2626 30%, var(--line-strong));color:#991b1b;font-size:13px}
.pp-err.on{display:block}
.pp-err code{font-family:var(--font-mono);font-size:12px}
.pp-loading{display:none;align-items:center;gap:8px;color:var(--ink-dim);font-size:13px;margin-top:12px}
.pp-loading.on{display:inline-flex}
.pp-spin{width:12px;height:12px;border:1.5px solid color-mix(in srgb, var(--ink) 25%, transparent);border-top-color:var(--ink);border-radius:50%;animation:pp-spin 0.8s linear infinite}
@keyframes pp-spin{to{transform:rotate(360deg)}}
.pp-footer{
margin-top:28px;padding-top:22px;border-top:0.5px solid var(--line);
display:flex;justify-content:space-between;gap:16px;color:var(--ink-faint);font-size:12px;flex-wrap:wrap;
}
.pp-footer a{color:var(--ink-dim)}
/* Pills & highlights used on view page (scoped so compose page can
reuse the preview rendering to show what the redacted version
looks like before the user commits) */
.pp-red{
display:inline-flex;align-items:center;gap:4px;
font-family:var(--font-sans);font-size:12px;font-weight:500;
padding:1px 7px 1px 6px;margin:0 1px;border-radius:3px;
background:color-mix(in srgb, var(--cat, #666) 14%, transparent);
color:color-mix(in srgb, var(--cat, #666) 62%, var(--ink));
vertical-align:baseline;letter-spacing:-0.002em;
border:0.5px solid color-mix(in srgb, var(--cat, #666) 28%, transparent);
}
.pp-red-dot{width:5px;height:5px;border-radius:50%;background:var(--cat,#666);flex:none}
@media (max-width:820px){
.pp-grid{grid-template-columns:1fr}
.pp-pane + .pp-pane{border-left:0;border-top:0.5px solid var(--line)}
.pp-preview-row{grid-template-columns:1fr}
}
</style>
</head>
<body>
<div class="pp-shell">
<div class="pp-brand">
<div class="pp-brand-mark">P</div>
<div class="pp-brand-name">DLP Paste-Proxy<span class="sub">pastebin with a conscience</span></div>
</div>
<div class="pp-hero">
<h1>Paste sensitive text.<br>Share only the redacted view.</h1>
<p>OpenAI Privacy Filter scans your paste for names, addresses, emails, phones, URLs, dates, account numbers, and secrets before minting a shareable link. Viewers see placeholders; only your private reveal link shows the original.</p>
</div>
<div class="pp-card">
<div class="pp-card-head">
<span class="pp-caps">Compose</span>
<h2>New paste</h2>
<span class="pp-spacer"></span>
<span class="pp-sub" id="pp-char-count">0 / """ + f"{MAX_PASTE_CHARS:,}" + r""" chars</span>
</div>
<div class="pp-grid">
<div class="pp-pane">
<label class="pp-label" for="pp-text">Paste body</label>
<textarea id="pp-text" class="pp-textarea" spellcheck="false"
placeholder="Paste anything β€” a DM thread, a log line, an email, a support ticket. The OPF model labels each character span; placeholders replace the private parts before the URL is minted."></textarea>
<div class="pp-sub">
<span id="pp-cursor">line 1, col 1</span>
<span class="sep">Β·</span>
<span>no data leaves this server except as redacted placeholders</span>
</div>
</div>
<div class="pp-pane">
<label class="pp-label">Auto-expiry</label>
<div class="pp-ttl" id="pp-ttl" role="tablist" aria-label="Expiration">
<button type="button" data-ttl="never" aria-pressed="true">Never</button>
<button type="button" data-ttl="1h" aria-pressed="false">1h</button>
<button type="button" data-ttl="24h" aria-pressed="false">24h</button>
<button type="button" data-ttl="7d" aria-pressed="false">7d</button>
</div>
<p class="pp-hint">A background sweeper deletes expired pastes on the server. Expired links 404.</p>
<label class="pp-label" style="margin-top:20px">Create</label>
<button type="button" id="pp-create" class="pp-btn pp-btn-primary">
<span>Scan & mint link</span>
<span class="pp-btn-arr">↡</span>
</button>
<div class="pp-loading" id="pp-loading">
<span class="pp-spin"></span><span>Running OPF on your paste…</span>
</div>
<div class="pp-err" id="pp-err"></div>
</div>
</div>
</div>
<section class="pp-success" id="pp-success">
<span class="pp-caps">Paste minted</span>
<h3>Your paste is ready.</h3>
<div class="pp-link-label">
Shareable view link
<span class="hint">redacted β€” give to recipients</span>
</div>
<div class="pp-link">
<input id="pp-view-url" readonly value="">
<button type="button" data-copy="pp-view-url">Copy</button>
</div>
<div class="pp-link-label">
Private reveal link
<span class="priv">author only</span>
<span class="hint">shows original β€” keep it to yourself</span>
</div>
<div class="pp-link">
<input id="pp-reveal-url" readonly value="">
<button type="button" data-copy="pp-reveal-url">Copy</button>
</div>
<div class="pp-preview-row">
<div class="pp-preview">
<span class="pp-caps">What recipients will see</span>
<div id="pp-preview-redacted"></div>
</div>
<div class="pp-preview" style="font-family:var(--font-sans);font-size:12.5px;line-height:1.5">
<span class="pp-caps">Summary</span>
<div id="pp-preview-summary"></div>
</div>
</div>
</section>
<footer class="pp-footer">
<div>Powered by <a href="https://huggingface.co/openai/privacy-filter" target="_blank" rel="noopener">OpenAI Privacy Filter</a> Β· 1.5B params, 50M active, 128k context</div>
<div><a href="#" id="pp-about">How this works β†’</a></div>
</footer>
</div>
<script type="module">
// ══════════════════════════════════════════════════════════════════
// Gradio JS client β€” hits the queued @server.api create_paste route
// so the OPF model call is serialized through Gradio's queue and
// plays nicely with @spaces.GPU on ZeroGPU.
// ══════════════════════════════════════════════════════════════════
import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
const clientPromise = Client.connect(window.location.origin);
const CATS = """ + _CATEGORIES_JSON + r""";
const MAX = """ + str(MAX_PASTE_CHARS) + r""";
const $text = document.getElementById('pp-text');
const $cc = document.getElementById('pp-char-count');
const $cur = document.getElementById('pp-cursor');
const $ttl = document.getElementById('pp-ttl');
const $btn = document.getElementById('pp-create');
const $load = document.getElementById('pp-loading');
const $err = document.getElementById('pp-err');
const $ok = document.getElementById('pp-success');
function updateCount(){
const n = $text.value.length;
$cc.textContent = n.toLocaleString() + ' / ' + MAX.toLocaleString() + ' chars';
$cc.style.color = n > MAX ? '#b45309' : '';
}
function updateCursor(){
const pos = $text.selectionStart;
const lines = $text.value.slice(0, pos).split('\n');
$cur.textContent = 'line ' + lines.length + ', col ' + (lines[lines.length-1].length + 1);
}
$text.addEventListener('input', updateCount);
['keyup','click','focus','mouseup'].forEach(e => $text.addEventListener(e, updateCursor));
let ttl = 'never';
$ttl.addEventListener('click', (e) => {
const b = e.target.closest('button'); if (!b) return;
[...$ttl.querySelectorAll('button')].forEach(x => x.setAttribute('aria-pressed', x === b ? 'true' : 'false'));
ttl = b.dataset.ttl;
});
function renderRedacted(redacted){
let html = '';
let i = 0;
while (i < redacted.length){
const lt = redacted.indexOf('<', i);
if (lt === -1){ html += escapeHtml(redacted.slice(i)); break; }
html += escapeHtml(redacted.slice(i, lt));
const gt = redacted.indexOf('>', lt + 1);
if (gt === -1){ html += escapeHtml(redacted.slice(lt)); break; }
const tag = redacted.slice(lt+1, gt);
const key = tag.toLowerCase();
const meta = CATS[key];
if (!meta){ html += escapeHtml(redacted.slice(lt, gt+1)); }
else {
html += '<span class="pp-red" data-cat="'+escapeHtml(key)+'" style="--cat:'+meta.color+'">'+
'<span class="pp-red-dot"></span>'+escapeHtml(meta.label)+'</span>';
}
i = gt + 1;
}
return html;
}
function escapeHtml(s){ return s.replace(/[&<>"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c])); }
async function createPaste(){
const text = $text.value.trim();
$err.classList.remove('on'); $err.textContent = '';
if (!text){ $err.classList.add('on'); $err.textContent = 'Paste is empty.'; return; }
if (text.length > MAX){ $err.classList.add('on'); $err.textContent = 'Paste exceeds ' + MAX.toLocaleString() + ' characters.'; return; }
$btn.disabled = true; $load.classList.add('on'); $ok.classList.remove('on');
try{
const client = await clientPromise;
const result = await client.predict("/create_paste", { text, ttl });
const data = result.data[0] || {};
if (data.error) throw new Error(data.error);
const origin = window.location.origin;
document.getElementById('pp-view-url').value = origin + data.view_path;
document.getElementById('pp-reveal-url').value = origin + data.reveal_path;
// create_paste already returns the redacted preview in-line, so
// no second round-trip to /api/paste/{id} is needed here.
document.getElementById('pp-preview-redacted').innerHTML = renderRedacted(data.redacted || '');
const s = data.stats;
const cats = Object.entries(s.categories).sort((a,b) => b[1].count - a[1].count);
const catHtml = cats.length
? cats.map(([k,v]) => {
const m = CATS[k] || {label:k, color:'#333'};
return '<span class="pp-red" style="--cat:'+m.color+';margin:2px 4px 2px 0"><span class="pp-red-dot"></span>'+escapeHtml(m.label)+' Γ— '+v.count+'</span>';
}).join('')
: '<em style="color:var(--ink-faint)">No PII found in this paste.</em>';
document.getElementById('pp-preview-summary').innerHTML =
'<div style="display:flex;gap:18px;margin-bottom:10px;align-items:baseline"><div><div style="font-family:var(--font-serif);font-size:26px;letter-spacing:-0.02em;line-height:1">'+s.pii_percentage+'%</div><div class="pp-caps" style="margin-top:3px">PII density</div></div>'+
'<div><div style="font-family:var(--font-serif);font-size:26px;letter-spacing:-0.02em;line-height:1">'+s.total_spans+'</div><div class="pp-caps" style="margin-top:3px">spans</div></div>'+
'<div><div style="font-family:var(--font-serif);font-size:26px;letter-spacing:-0.02em;line-height:1">'+s.total_chars.toLocaleString()+'</div><div class="pp-caps" style="margin-top:3px">chars</div></div></div>'+
'<div>'+catHtml+'</div>';
$ok.classList.add('on');
$ok.scrollIntoView({behavior:'smooth', block:'start'});
} catch (e) {
$err.classList.add('on');
$err.textContent = e.message || 'Failed to create paste.';
} finally {
$btn.disabled = false; $load.classList.remove('on');
}
}
$btn.addEventListener('click', createPaste);
$text.addEventListener('keydown', (e) => {
if ((e.metaKey || e.ctrlKey) && e.key === 'Enter'){ e.preventDefault(); createPaste(); }
});
document.addEventListener('click', (e) => {
const b = e.target.closest('[data-copy]'); if (!b) return;
const inp = document.getElementById(b.dataset.copy);
inp.select(); navigator.clipboard.writeText(inp.value);
const prev = b.textContent; b.textContent = 'Copied'; setTimeout(() => b.textContent = prev, 1200);
});
updateCount(); updateCursor();
</script>
</body>
</html>
"""
# ── view page ─────────────────────────────────────────────────────
_VIEW_HTML = r"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Paste __PID__ β€” DLP Paste-Proxy</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
<style>
""" + _SHARED_CSS + r"""
.pp-view-head{
display:flex;align-items:center;gap:10px;flex-wrap:wrap;margin-bottom:18px;
padding:14px 16px;background:var(--panel);border:0.5px solid var(--line);
border-radius:var(--radius-lg);box-shadow:var(--shadow-sm);
}
.pp-view-id{font-family:var(--font-mono);font-size:12.5px;color:var(--ink-dim);padding:3px 8px;background:var(--panel-2);border:0.5px solid var(--line);border-radius:5px}
.pp-view-mode{font-size:11px;font-weight:600;letter-spacing:0.06em;text-transform:uppercase;padding:3px 8px;border-radius:4px;background:color-mix(in srgb,var(--ink) 8%,transparent);color:var(--ink-dim)}
.pp-view-expiry{font-family:var(--font-mono);font-size:11.5px;color:var(--ink-faint)}
.pp-stat-row{display:flex;gap:26px;flex-wrap:wrap;margin-left:auto;margin-right:0}
.pp-stat{text-align:right}
.pp-stat b{font-family:var(--font-serif);font-weight:500;font-size:22px;letter-spacing:-0.01em;display:block;line-height:1}
.pp-stat span{font-size:10.5px;letter-spacing:0.08em;text-transform:uppercase;color:var(--ink-faint);font-weight:500}
.pp-view-body{
background:var(--panel);border:0.5px solid var(--line);border-radius:var(--radius-lg);
box-shadow:var(--shadow-md);padding:28px 32px 30px;
}
.pp-body-redacted, .pp-body-reveal{
font-family:var(--font-serif);font-size:17px;line-height:1.7;
color:var(--ink);
white-space:pre-wrap;word-wrap:break-word;
}
/* highlight (reveal mode) */
.pp-hi{
background:color-mix(in srgb, var(--cat,#666) 18%, transparent);
color:var(--ink);
border-radius:3px;padding:1px 3px;margin:0 1px;
border:0.5px solid color-mix(in srgb, var(--cat,#666) 30%, transparent);
position:relative;
}
.pp-hi-tag{
font-family:var(--font-sans);font-size:9.5px;letter-spacing:0.07em;text-transform:uppercase;
font-weight:600;color:var(--cat,#666);margin-left:4px;opacity:.72;
}
.pp-badges{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:18px;padding-bottom:16px;border-bottom:0.5px solid var(--line)}
.pp-badge{
display:inline-flex;align-items:center;gap:6px;
font-size:12px;font-weight:500;padding:4px 9px 4px 8px;
border-radius:4px;background:color-mix(in srgb, var(--cat,#666) 10%, transparent);
border:0.5px solid color-mix(in srgb, var(--cat,#666) 22%, transparent);
color:var(--ink-dim);
}
.pp-badge-dot{width:6px;height:6px;border-radius:50%;background:var(--cat,#666)}
.pp-badge-n{font-family:var(--font-mono);font-size:11px;color:var(--cat,#666);font-weight:600;margin-left:2px}
.pp-muted{color:var(--ink-faint);font-size:13px}
.pp-actions{display:flex;gap:10px;margin-top:16px;flex-wrap:wrap}
.pp-btn{
font-size:12.5px;font-weight:500;padding:8px 14px;
border:0.5px solid var(--line-strong);border-radius:var(--radius-md);
background:var(--panel);color:var(--ink);display:inline-flex;align-items:center;gap:8px;
transition:background .12s,border-color .12s;
}
.pp-btn:hover{background:color-mix(in srgb, var(--ink) 4%, var(--panel));border-color:var(--ink-dim)}
.pp-footer{margin-top:28px;padding-top:22px;border-top:0.5px solid var(--line);display:flex;justify-content:space-between;gap:16px;color:var(--ink-faint);font-size:12px;flex-wrap:wrap}
</style>
</head>
<body>
<div class="pp-shell">
<div class="pp-brand">
<a href="/" style="text-decoration:none;display:flex;align-items:center;gap:10px">
<div class="pp-brand-mark">P</div>
<div class="pp-brand-name">DLP Paste-Proxy<span class="sub">pastebin with a conscience</span></div>
</a>
</div>
<div class="pp-view-head">
<span class="pp-caps">Paste</span>
<span class="pp-view-id">__PID__</span>
<span class="pp-view-mode">__MODE__</span>
<span class="pp-view-expiry">__CREATED__ Β· __EXPIRY__</span>
<div class="pp-stat-row">
<div class="pp-stat"><b>__PCT__%</b><span>PII density</span></div>
<div class="pp-stat"><b>__SPANS_N__</b><span>spans</span></div>
<div class="pp-stat"><b>__CHARS_N__</b><span>chars</span></div>
</div>
</div>
__BANNER__
<div class="pp-view-body">
<div class="pp-badges">__BADGES__</div>
<div class="__BODY_CLASS__">__BODY__</div>
<div class="pp-actions">
<button type="button" class="pp-btn" onclick="navigator.clipboard.writeText(window.location.href); this.textContent='Copied this link'">Copy this link</button>
<a class="pp-btn" href="/">Create your own paste β†’</a>
</div>
</div>
<footer class="pp-footer">
<div>Recipients see placeholders. The author's reveal link shows the original inline.</div>
<div>Views: __VIEWS__ Β· Reveals: __REVEALS__</div>
</footer>
</div>
</body>
</html>
"""
_NOT_FOUND_HTML = r"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Paste not found β€” DLP Paste-Proxy</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
<style>
""" + _SHARED_CSS + r"""
.pp-404{
background:var(--panel);border:0.5px solid var(--line);border-radius:var(--radius-lg);
box-shadow:var(--shadow-md);padding:56px 40px;text-align:center;
}
.pp-404 h1{font-family:var(--font-serif);font-size:48px;font-weight:500;letter-spacing:-0.02em;line-height:1;margin-bottom:10px}
.pp-404 p{color:var(--ink-dim);margin-bottom:22px;max-width:44ch;margin-left:auto;margin-right:auto}
.pp-404 code{font-family:var(--font-mono);font-size:12.5px;background:var(--panel-2);padding:2px 8px;border-radius:4px}
.pp-btn{font-size:13px;font-weight:500;padding:10px 16px;border:0.5px solid var(--line-strong);border-radius:var(--radius-md);background:var(--primary-bg);color:var(--primary-fg);display:inline-flex;align-items:center;gap:8px}
</style>
</head>
<body>
<div class="pp-shell">
<div class="pp-brand">
<a href="/" style="text-decoration:none;display:flex;align-items:center;gap:10px">
<div class="pp-brand-mark">P</div>
<div class="pp-brand-name">DLP Paste-Proxy<span class="sub">pastebin with a conscience</span></div>
</a>
</div>
<div class="pp-404">
<h1>Paste not found</h1>
<p><code>{{PID}}</code> either never existed, expired by its TTL, or was evicted by a server restart. Pastes live in process memory for the demo.</p>
<a class="pp-btn" href="/">Create a new paste β†’</a>
</div>
</div>
</body>
</html>
"""
# ── launch ────────────────────────────────────────────────────────
if __name__ == "__main__":
server.launch(server_name="0.0.0.0", server_port=7860)