Spaces:

ysharma
/

OPF-Document-PII-Explorer

Running on Zero

App Files Files Community

ysharma HF Staff commited on 14 days ago

Commit

01719f8

verified ·

1 Parent(s): 47b2347

Update app_v6.py

Browse files

Files changed (1) hide show

app_v6.py +154 -619

app_v6.py CHANGED Viewed

@@ -1,40 +1,42 @@
 """
-============================================
-PII Explorer - Document Privacy Explorer / Playground
-============================================
 """
 # ── stdlib ───────────────────────────────────────────────────────
-import dataclasses
 import functools
 import io
 import json
-import math
 import os
 import re
 import tempfile
 import time
-from bisect import bisect_left, bisect_right
-from collections.abc import Sequence
-from dataclasses import dataclass
 from pathlib import Path
-from typing import Final
 # ── third-party ──────────────────────────────────────────────────
 import gradio as gr
 import spaces
-import tiktoken
 import torch
-import torch.nn.functional as F
-from fastapi import File, Form, UploadFile
-from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
-from huggingface_hub import snapshot_download
-from safetensors import safe_open
 # ── configuration ────────────────────────────────────────────────
-MODEL_REPO = os.getenv("MODEL_ID", "charles-first-org/second-model")
 HF_TOKEN = os.getenv("HF_TOKEN", None)
-MODEL_DIR = Path(snapshot_download(MODEL_REPO, token=HF_TOKEN))
 CATEGORIES_META = {
     "private_person":  {"color": "#E24B4A", "cls": "hp",  "label": "Person",  "mono": False},
@@ -48,537 +50,41 @@ CATEGORIES_META = {
 }
 # =====================================================================
-# MODEL  ARCHITECTURE  +  INFERENCE  (unchanged from reference impl)
 # =====================================================================
-PRIVACY_FILTER_MODEL_TYPE: Final[str] = "privacy_filter"
-REQUIRED_MODEL_CONFIG_KEYS: Final[tuple[str, ...]] = (
-    "model_type", "encoding", "num_hidden_layers", "num_experts",
-    "experts_per_token", "vocab_size", "num_labels", "hidden_size",
-    "intermediate_size", "head_dim", "num_attention_heads",
-    "num_key_value_heads", "sliding_window", "bidirectional_context",
-    "bidirectional_left_context", "bidirectional_right_context",
-    "default_n_ctx", "initial_context_length", "rope_theta",
-    "rope_scaling_factor", "rope_ntk_alpha", "rope_ntk_beta", "param_dtype",
-)
-BACKGROUND_CLASS_LABEL: Final[str] = "O"
-BOUNDARY_PREFIXES: Final[tuple[str, ...]] = ("B", "I", "E", "S")
-SPAN_CLASS_NAMES: Final[tuple[str, ...]] = (
-    BACKGROUND_CLASS_LABEL,
-    "account_number", "private_address", "private_date", "private_email",
-    "private_person", "private_phone", "private_url", "secret",
-)
-NER_CLASS_NAMES: Final[tuple[str, ...]] = (BACKGROUND_CLASS_LABEL,) + tuple(
-    f"{prefix}-{base}"
-    for base in SPAN_CLASS_NAMES if base != BACKGROUND_CLASS_LABEL
-    for prefix in BOUNDARY_PREFIXES
-)
-VITERBI_TRANSITION_BIAS_KEYS: Final[tuple[str, ...]] = (
-    "transition_bias_background_stay", "transition_bias_background_to_start",
-    "transition_bias_inside_to_continue", "transition_bias_inside_to_end",
-    "transition_bias_end_to_background", "transition_bias_end_to_start",
-)
-DEFAULT_VITERBI_CALIBRATION_PRESET: Final[str] = "default"
-def validate_model_config_contract(cfg: dict, *, context: str) -> None:
-    missing = [k for k in REQUIRED_MODEL_CONFIG_KEYS if k not in cfg]
-    if missing:
-        raise ValueError(f"{context} missing keys: {', '.join(missing)}")
-    if cfg.get("model_type") != PRIVACY_FILTER_MODEL_TYPE:
-        raise ValueError(f"{context} model_type must be {PRIVACY_FILTER_MODEL_TYPE!r}")
-    if cfg.get("bidirectional_context") is not True:
-        raise ValueError(f"{context} must use bidirectional_context=true")
-    lc, rc = cfg.get("bidirectional_left_context"), cfg.get("bidirectional_right_context")
-    if not isinstance(lc, int) or not isinstance(rc, int) or lc != rc or lc < 0:
-        raise ValueError(f"{context} bidirectional context must be equal non-negative ints")
-    sw = cfg.get("sliding_window")
-    if sw != 2 * lc + 1:
-        raise ValueError(f"{context} sliding_window must equal 2*context+1")
-    if cfg["num_labels"] != 33:
-        raise ValueError(f"{context} num_labels must be 33")
-    if cfg["param_dtype"] != "bfloat16":
-        raise ValueError(f"{context} param_dtype must be bfloat16")
-def expert_linear(x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor | None) -> torch.Tensor:
-    n, e, k = x.shape
-    _, _, _, o = weight.shape
-    out = torch.bmm(x.reshape(n * e, 1, k), weight.reshape(n * e, k, o)).reshape(n, e, o)
-    return out + bias if bias is not None else out
-@dataclass
-class ModelConfig:
-    num_hidden_layers: int; num_experts: int; experts_per_token: int
-    vocab_size: int; num_labels: int; hidden_size: int; intermediate_size: int
-    head_dim: int; num_attention_heads: int; num_key_value_heads: int
-    bidirectional_context_size: int; initial_context_length: int
-    rope_theta: float; rope_scaling_factor: float; rope_ntk_alpha: float; rope_ntk_beta: float
-    @classmethod
-    def from_checkpoint_config(cls, cfg: dict, *, context: str) -> "ModelConfig":
-        cfg = dict(cfg)
-        cfg["bidirectional_context_size"] = cfg["bidirectional_left_context"]
-        fields = {f.name for f in dataclasses.fields(cls)}
-        return cls(**{k: v for k, v in cfg.items() if k in fields})
-class RMSNorm(torch.nn.Module):
-    def __init__(self, n: int, eps: float = 1e-5, device=None):
-        super().__init__()
-        self.eps = eps
-        self.scale = torch.nn.Parameter(torch.ones(n, device=device, dtype=torch.float32))
-    def forward(self, x):
-        t = x.float()
-        return (t * torch.rsqrt(t.pow(2).mean(-1, keepdim=True) + self.eps) * self.scale).to(x.dtype)
-def apply_rope(x, cos, sin):
-    cos = cos.unsqueeze(-2).to(x.dtype); sin = sin.unsqueeze(-2).to(x.dtype)
-    x1, x2 = x[..., ::2], x[..., 1::2]
-    return torch.stack((x1 * cos - x2 * sin, x2 * cos + x1 * sin), dim=-1).reshape(x.shape)
-class RotaryEmbedding(torch.nn.Module):
-    def __init__(self, head_dim, base, dtype, *, initial_context_length=4096,
-                 scaling_factor=1.0, ntk_alpha=1.0, ntk_beta=32.0, device=None):
-        super().__init__()
-        self.head_dim, self.base, self.dtype = head_dim, base, dtype
-        self.initial_context_length = initial_context_length
-        self.scaling_factor, self.ntk_alpha, self.ntk_beta = scaling_factor, ntk_alpha, ntk_beta
-        self.device = device
-        mp = max(int(initial_context_length * scaling_factor), initial_context_length)
-        self.max_position_embeddings = mp
-        cos, sin = self._compute(mp, device=torch.device("cpu"))
-        target = device or torch.device("cpu")
-        self.register_buffer("cos_cache", cos.to(target), persistent=False)
-        self.register_buffer("sin_cache", sin.to(target), persistent=False)
-    def _inv_freq(self, device=None):
-        device = device or self.device
-        freq = self.base ** (torch.arange(0, self.head_dim, 2, dtype=torch.float, device=device) / self.head_dim)
-        if self.scaling_factor > 1.0:
-            d_half = self.head_dim / 2
-            low = d_half * math.log(self.initial_context_length / (self.ntk_beta * 2 * math.pi)) / math.log(self.base)
-            high = d_half * math.log(self.initial_context_length / (self.ntk_alpha * 2 * math.pi)) / math.log(self.base)
-            interp = 1.0 / (self.scaling_factor * freq)
-            extrap = 1.0 / freq
-            ramp = (torch.arange(d_half, dtype=torch.float32, device=device) - low) / (high - low)
-            mask = 1 - ramp.clamp(0, 1)
-            return interp * (1 - mask) + extrap * mask
-        return 1.0 / freq
-    def _compute(self, n, device=None):
-        inv_freq = self._inv_freq(device)
-        t = torch.arange(n, dtype=torch.float32, device=device or self.device)
-        freqs = torch.einsum("i,j->ij", t, inv_freq)
-        c = 0.1 * math.log(self.scaling_factor) + 1.0 if self.scaling_factor > 1.0 else 1.0
-        return (freqs.cos() * c).to(self.dtype), (freqs.sin() * c).to(self.dtype)
-    def forward(self, q, k):
-        n = q.shape[0]
-        if n > self.cos_cache.shape[0]:
-            cos, sin = self._compute(n, torch.device("cpu"))
-            self.cos_cache, self.sin_cache = cos.to(q.device), sin.to(q.device)
-        cc = self.cos_cache.to(q.device) if self.cos_cache.device != q.device else self.cos_cache
-        sc = self.sin_cache.to(q.device) if self.sin_cache.device != q.device else self.sin_cache
-        cos, sin = cc[:n], sc[:n]
-        q = apply_rope(q.view(n, -1, self.head_dim), cos, sin).reshape(q.shape)
-        k = apply_rope(k.view(n, -1, self.head_dim), cos, sin).reshape(k.shape)
-        return q, k
-def sdpa(Q, K, V, S, sm_scale, ctx):
-    n, nh, qm, hd = Q.shape
-    w = 2 * ctx + 1
-    Kp = F.pad(K, (0, 0, 0, 0, ctx, ctx)); Vp = F.pad(V, (0, 0, 0, 0, ctx, ctx))
-    Kw = Kp.unfold(0, w, 1).permute(0, 3, 1, 2); Vw = Vp.unfold(0, w, 1).permute(0, 3, 1, 2)
-    idx = torch.arange(w, device=Q.device) - ctx
-    pos = torch.arange(n, device=Q.device)[:, None] + idx[None, :]
-    valid = (pos >= 0) & (pos < n)
-    scores = torch.einsum("nhqd,nwhd->nhqw", Q, Kw).float() * sm_scale
-    scores = scores.masked_fill(~valid[:, None, None, :], -float("inf"))
-    sink = (S * math.log(2.0)).reshape(nh, qm)[None, :, :, None].expand(n, -1, -1, 1)
-    scores = torch.cat([scores, sink], dim=-1)
-    wt = torch.softmax(scores, dim=-1)[..., :-1].to(V.dtype)
-    return torch.einsum("nhqw,nwhd->nhqd", wt, Vw).reshape(n, -1)
-class AttentionBlock(torch.nn.Module):
-    def __init__(self, cfg: ModelConfig, device=None):
-        super().__init__()
-        dt = torch.bfloat16
-        self.head_dim, self.nah, self.nkv = cfg.head_dim, cfg.num_attention_heads, cfg.num_key_value_heads
-        self.ctx = int(cfg.bidirectional_context_size)
-        self.sinks = torch.nn.Parameter(torch.empty(cfg.num_attention_heads, device=device, dtype=torch.float32))
-        self.norm = RMSNorm(cfg.hidden_size, device=device)
-        qkv_d = cfg.head_dim * (cfg.num_attention_heads + 2 * cfg.num_key_value_heads)
-        self.qkv = torch.nn.Linear(cfg.hidden_size, qkv_d, device=device, dtype=dt)
-        self.out = torch.nn.Linear(cfg.head_dim * cfg.num_attention_heads, cfg.hidden_size, device=device, dtype=dt)
-        self.qk_scale = 1 / math.sqrt(math.sqrt(cfg.head_dim))
-        self.rope = RotaryEmbedding(cfg.head_dim, int(cfg.rope_theta), torch.float32,
-                                     initial_context_length=cfg.initial_context_length,
-                                     scaling_factor=cfg.rope_scaling_factor,
-                                     ntk_alpha=cfg.rope_ntk_alpha, ntk_beta=cfg.rope_ntk_beta, device=device)
-    def forward(self, x):
-        t = self.norm(x).to(self.qkv.weight.dtype)
-        qkv = F.linear(t, self.qkv.weight, self.qkv.bias)
-        hd, nah, nkv = self.head_dim, self.nah, self.nkv
-        q = qkv[:, :nah * hd].contiguous()
-        k = qkv[:, nah * hd:(nah + nkv) * hd].contiguous()
-        v = qkv[:, (nah + nkv) * hd:(nah + 2 * nkv) * hd].contiguous()
-        q, k = self.rope(q, k)
-        q, k = q * self.qk_scale, k * self.qk_scale
-        n = q.shape[0]
-        q = q.view(n, nkv, nah // nkv, hd); k = k.view(n, nkv, hd); v = v.view(n, nkv, hd)
-        ao = sdpa(q, k, v, self.sinks, 1.0, self.ctx).to(self.out.weight.dtype)
-        return x + F.linear(ao, self.out.weight, self.out.bias).to(x.dtype)
-def swiglu(x, alpha=1.702, limit=7.0):
-    g, l = x.chunk(2, dim=-1)
-    g, l = g.clamp(max=limit), l.clamp(-limit, limit)
-    return g * torch.sigmoid(alpha * g) * (l + 1)
-class MLPBlock(torch.nn.Module):
-    def __init__(self, cfg: ModelConfig, device=None):
-        super().__init__()
-        dt = torch.bfloat16
-        self.ne, self.ept = cfg.num_experts, cfg.experts_per_token
-        self.norm = RMSNorm(cfg.hidden_size, device=device)
-        self.gate = torch.nn.Linear(cfg.hidden_size, cfg.num_experts, device=device, dtype=dt)
-        self.mlp1_weight = torch.nn.Parameter(torch.empty(cfg.num_experts, cfg.hidden_size, cfg.intermediate_size * 2, device=device, dtype=dt))
-        self.mlp1_bias = torch.nn.Parameter(torch.empty(cfg.num_experts, cfg.intermediate_size * 2, device=device, dtype=dt))
-        self.mlp2_weight = torch.nn.Parameter(torch.empty(cfg.num_experts, cfg.intermediate_size, cfg.hidden_size, device=device, dtype=dt))
-        self.mlp2_bias = torch.nn.Parameter(torch.empty(cfg.num_experts, cfg.hidden_size, device=device, dtype=dt))
-    def forward(self, x):
-        t = self.norm(x)
-        gs = F.linear(t.float(), self.gate.weight.float(), self.gate.bias.float())
-        top = torch.topk(gs, k=self.ept, dim=-1, sorted=True)
-        ew = torch.softmax(top.values, dim=-1) / self.ept
-        ei = top.indices
-        ept = self.ept
-        def _chunk(tc, eic, ewc):
-            o = expert_linear(tc.float().unsqueeze(1).expand(-1, eic.shape[1], -1),
-                              self.mlp1_weight[eic].float(), self.mlp1_bias[eic].float())
-            o = swiglu(o)
-            o = expert_linear(o.float(), self.mlp2_weight[eic].float(), self.mlp2_bias[eic].float())
-            return (torch.einsum("bec,be->bc", o.to(ewc.dtype), ewc) * ept).to(x.dtype)
-        cs = 32
-        if t.shape[0] > cs:
-            parts = [_chunk(t[s:s+cs], ei[s:s+cs], ew[s:s+cs]) for s in range(0, t.shape[0], cs)]
-            return x + torch.cat(parts, 0)
-        return x + _chunk(t, ei, ew)
-class TransformerBlock(torch.nn.Module):
-    def __init__(self, cfg, device=None):
-        super().__init__()
-        self.attn = AttentionBlock(cfg, device=device)
-        self.mlp = MLPBlock(cfg, device=device)
-    def forward(self, x):
-        return self.mlp(self.attn(x))
-class Checkpoint:
-    @staticmethod
-    def build_param_name_map(n):
-        return ({f"block.{i}.mlp.mlp1_bias": f"block.{i}.mlp.swiglu.bias" for i in range(n)}
-              | {f"block.{i}.mlp.mlp1_weight": f"block.{i}.mlp.swiglu.weight" for i in range(n)}
-              | {f"block.{i}.mlp.mlp2_bias": f"block.{i}.mlp.out.bias" for i in range(n)}
-              | {f"block.{i}.mlp.mlp2_weight": f"block.{i}.mlp.out.weight" for i in range(n)})
-    def __init__(self, path, device, num_hidden_layers):
-        self.pnm = self.build_param_name_map(num_hidden_layers)
-        self.ds = device.type if device.index is None else f"{device.type}:{device.index}"
-        files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".safetensors")]
-        self.map = {}
-        for sf in files:
-            with safe_open(sf, framework="pt", device=self.ds) as h:
-                for k in h.keys():
-                    self.map[k] = sf
-    def get(self, name):
-        mapped = self.pnm.get(name, name)
-        with safe_open(self.map[mapped], framework="pt", device=self.ds) as h:
-            return h.get_tensor(mapped)
-class Transformer(torch.nn.Module):
-    def __init__(self, cfg, device):
-        super().__init__()
-        dt = torch.bfloat16
-        self.embedding = torch.nn.Embedding(cfg.vocab_size, cfg.hidden_size, device=device, dtype=dt)
-        self.block = torch.nn.ModuleList([TransformerBlock(cfg, device=device) for _ in range(cfg.num_hidden_layers)])
-        self.norm = RMSNorm(cfg.hidden_size, device=device)
-        self.unembedding = torch.nn.Linear(cfg.hidden_size, cfg.num_labels, bias=False, device=device, dtype=dt)
-    def forward(self, token_ids):
-        x = self.embedding(token_ids)
-        for blk in self.block:
-            x = blk(x)
-        return F.linear(self.norm(x), self.unembedding.weight, None)
-    @classmethod
-    def from_checkpoint(cls, checkpoint_dir, *, device):
-        torch.backends.cuda.matmul.allow_tf32 = False
-        torch.backends.cudnn.allow_tf32 = False
-        torch.set_float32_matmul_precision("highest")
-        cp = json.loads((Path(checkpoint_dir) / "config.json").read_text())
-        validate_model_config_contract(cp, context=str(checkpoint_dir))
-        cfg = ModelConfig.from_checkpoint_config(cp, context=str(checkpoint_dir))
-        ckpt = Checkpoint(checkpoint_dir, device, cfg.num_hidden_layers)
-        m = cls(cfg, device); m.eval()
-        for name, param in m.named_parameters():
-            loaded = ckpt.get(name)
-            if param.shape != loaded.shape:
-                raise ValueError(f"Shape mismatch {name}: {param.shape} vs {loaded.shape}")
-            param.data.copy_(loaded)
-        return m
-# ── label info + span decoding ───────────────────────────────────
-@dataclass(frozen=True)
-class LabelInfo:
-    boundary_label_lookup: dict[str, dict[str, int]]
-    token_to_span_label: dict[int, int]
-    token_boundary_tags: dict[int, str | None]
-    span_class_names: tuple[str, ...]
-    span_label_lookup: dict[str, int]
-    background_token_label: int
-    background_span_label: int
-def labels_to_spans(labels_by_index, label_info):
-    spans, cur_label, start_idx, prev_idx = [], None, None, None
-    bg = label_info.background_span_label
-    for ti in sorted(labels_by_index):
-        lid = labels_by_index[ti]
-        sl = label_info.token_to_span_label.get(lid)
-        bt = label_info.token_boundary_tags.get(lid)
-        if prev_idx is not None and ti != prev_idx + 1:
-            if cur_label is not None and start_idx is not None:
-                spans.append((cur_label, start_idx, prev_idx + 1))
-            cur_label = start_idx = None
-        if sl is None:
-            prev_idx = ti; continue
-        if sl == bg:
-            if cur_label is not None and start_idx is not None:
-                spans.append((cur_label, start_idx, ti))
-            cur_label = start_idx = None; prev_idx = ti; continue
-        if bt == "S":
-            if cur_label is not None and start_idx is not None and prev_idx is not None:
-                spans.append((cur_label, start_idx, prev_idx + 1))
-            spans.append((sl, ti, ti + 1)); cur_label = start_idx = None
-        elif bt == "B":
-            if cur_label is not None and start_idx is not None and prev_idx is not None:
-                spans.append((cur_label, start_idx, prev_idx + 1))
-            cur_label, start_idx = sl, ti
-        elif bt == "I":
-            if cur_label is None or cur_label != sl:
-                if cur_label is not None and start_idx is not None and prev_idx is not None:
-                    spans.append((cur_label, start_idx, prev_idx + 1))
-                cur_label, start_idx = sl, ti
-        elif bt == "E":
-            if cur_label is None or cur_label != sl or start_idx is None:
-                if cur_label is not None and start_idx is not None and prev_idx is not None:
-                    spans.append((cur_label, start_idx, prev_idx + 1))
-                spans.append((sl, ti, ti + 1)); cur_label = start_idx = None
-            else:
-                spans.append((cur_label, start_idx, ti + 1)); cur_label = start_idx = None
-        else:
-            if cur_label is not None and start_idx is not None and prev_idx is not None:
-                spans.append((cur_label, start_idx, prev_idx + 1))
-            cur_label = start_idx = None
-        prev_idx = ti
-    if cur_label is not None and start_idx is not None and prev_idx is not None:
-        spans.append((cur_label, start_idx, prev_idx + 1))
-    return spans
-def token_spans_to_char_spans(spans, cs, ce):
-    out = []
-    for li, ts, te in spans:
-        if not (0 <= ts < te <= len(cs)):
-            continue
-        s, e = cs[ts], ce[te - 1]
-        if e > s:
-            out.append((li, s, e))
-    return out
-def trim_char_spans_whitespace(spans, text):
-    out = []
-    for li, s, e in spans:
-        if not (0 <= s < e <= len(text)):
-            continue
-        while s < e and text[s].isspace(): s += 1
-        while e > s and text[e - 1].isspace(): e -= 1
-        if e > s:
-            out.append((li, s, e))
-    return out
-# ── viterbi decoder ──────────────────────────────────────────────
 @functools.lru_cache(maxsize=1)
-def get_viterbi_transition_biases():
-    cp = MODEL_DIR / "viterbi_calibration.json"
-    default = {k: 0.0 for k in VITERBI_TRANSITION_BIAS_KEYS}
-    if not cp.is_file():
-        return default
-    payload = json.loads(cp.read_text())
-    raw = payload
-    ops = payload.get("operating_points")
-    if isinstance(ops, dict):
-        preset = ops.get(DEFAULT_VITERBI_CALIBRATION_PRESET)
-        if isinstance(preset, dict):
-            raw = preset.get("biases", raw)
-    if not isinstance(raw, dict):
-        return default
-    return {k: float(raw.get(k, 0.0)) for k in VITERBI_TRANSITION_BIAS_KEYS}
-class Decoder:
-    def __init__(self, label_info):
-        nc = len(label_info.token_to_span_label)
-        self._start = torch.full((nc,), -1e9, dtype=torch.float32)
-        self._end = torch.full((nc,), -1e9, dtype=torch.float32)
-        self._trans = torch.full((nc, nc), -1e9, dtype=torch.float32)
-        biases = get_viterbi_transition_biases()
-        bg_tok, bg_sp = label_info.background_token_label, label_info.background_span_label
-        ttsl, tbt = label_info.token_to_span_label, label_info.token_boundary_tags
-        for i in range(nc):
-            tag, sl = tbt.get(i), ttsl.get(i)
-            if tag in {"B", "S"} or i == bg_tok: self._start[i] = 0.0
-            if tag in {"E", "S"} or i == bg_tok: self._end[i] = 0.0
-            for j in range(nc):
-                nt, ns = tbt.get(j), ttsl.get(j)
-                if self._valid(tag, sl, nt, ns, bg_tok, bg_sp, j):
-                    self._trans[i, j] = self._bias(tag, sl, nt, ns, bg_sp, biases)
-    @staticmethod
-    def _valid(pt, ps, nt, ns, bti, bsi, ni):
-        nb = ns == bsi or ni == bti
-        if (ns is None or nt is None) and not nb: return False
-        if pt is None or ps is None: return nb or nt in {"B", "S"}
-        if ps == bsi or pt in {"E", "S"}: return nb or nt in {"B", "S"}
-        if pt in {"B", "I"}: return ps == ns and nt in {"I", "E"}
-        return False
-    @staticmethod
-    def _bias(pt, ps, nt, ns, bsi, b):
-        nb, pb = ns == bsi, ps == bsi
-        if pb: return b["transition_bias_background_stay"] if nb else b["transition_bias_background_to_start"]
-        if pt in {"B", "I"}: return b["transition_bias_inside_to_continue"] if nt == "I" else b["transition_bias_inside_to_end"]
-        return b["transition_bias_end_to_background"] if nb else b["transition_bias_end_to_start"]
-    def decode(self, lp):
-        # Runs on lp's device. When lp is on CUDA, the loop streams tiny
-        # kernels into the CUDA queue — on a warmed-up T4 this completes
-        # in a few seconds. v5's move to CPU looked cheap on paper but
-        # PyTorch CPU dispatch overhead made it far worse in practice.
-        sl, nc = lp.shape
-        if sl == 0: return []
-        st = self._start.to(lp.device, lp.dtype)
-        en = self._end.to(lp.device, lp.dtype)
-        tr = self._trans.to(lp.device, lp.dtype)
-        scores = lp[0] + st
-        bp = torch.empty((sl - 1, nc), device=lp.device, dtype=torch.int64)
-        for i in range(1, sl):
-            t = scores.unsqueeze(1) + tr
-            bs, bi = t.max(dim=0)
-            scores = bs + lp[i]; bp[i - 1] = bi
-        if not torch.isfinite(scores).any(): return lp.argmax(dim=1).tolist()
-        scores = scores + en
-        path = torch.empty(sl, device=lp.device, dtype=torch.int64)
-        path[-1] = scores.argmax()
-        for i in range(sl - 2, -1, -1): path[i] = bp[i, path[i + 1]]
-        return path.tolist()
-# ── runtime singleton ────────────────────────────────────────────
-@dataclass(frozen=True)
-class InferenceRuntime:
-    model: Transformer; encoding: tiktoken.Encoding; label_info: LabelInfo
-    device: torch.device; n_ctx: int
-@functools.lru_cache(maxsize=1)
-def get_runtime():
-    cp = MODEL_DIR
-    cfg = json.loads((cp / "config.json").read_text())
-    validate_model_config_contract(cfg, context=str(cp))
-    device = torch.device("cuda")
-    encoding = tiktoken.get_encoding(str(cfg["encoding"]).strip())
-    scn = [BACKGROUND_CLASS_LABEL]; sll = {BACKGROUND_CLASS_LABEL: 0}
-    bll, ttsl, tbt = {}, {}, {}
-    bg_idx = None
-    for idx, name in enumerate(NER_CLASS_NAMES):
-        if name == BACKGROUND_CLASS_LABEL:
-            bg_idx = idx; ttsl[idx] = 0; tbt[idx] = None; continue
-        bnd, base = name.split("-", 1)
-        si = sll.get(base)
-        if si is None:
-            si = len(scn); scn.append(base); sll[base] = si
-        ttsl[idx] = si; tbt[idx] = bnd
-        bll.setdefault(base, {})[bnd] = idx
-    li = LabelInfo(bll, ttsl, tbt, tuple(scn), sll, bg_idx, 0)
-    m = Transformer.from_checkpoint(str(cp), device=device)
-    return InferenceRuntime(m, encoding, li, device, int(cfg["default_n_ctx"]))
-@functools.lru_cache(maxsize=1)
-def get_decoder():
-    return Decoder(label_info=get_runtime().label_info)
-@torch.inference_mode()
-def predict_text(runtime, text, decoder):
-    tids = tuple(int(t) for t in runtime.encoding.encode(text, allowed_special="all"))
-    if not tids: return text, []
-    chunks = []
-    for s in range(0, len(tids), runtime.n_ctx):
-        e = min(s + runtime.n_ctx, len(tids))
-        wt = torch.tensor(tids[s:e], device=runtime.device, dtype=torch.int32)
-        lp = F.log_softmax(runtime.model(wt).float(), dim=-1)
-        chunks.append(lp)
-    # Single-chunk case dodges a copy; multi-chunk falls through to cat.
-    stacked = chunks[0] if len(chunks) == 1 else torch.cat(chunks, dim=0)
-    dl = decoder.decode(stacked)
-    if len(dl) != len(tids): dl = stacked.argmax(dim=1).tolist()
-    pli = {i: int(l) for i, l in enumerate(dl)}
-    pts = labels_to_spans(pli, runtime.label_info)
-    tb = [runtime.encoding.decode_single_token_bytes(t) for t in tids]
-    dt = b"".join(tb).decode("utf-8", errors="replace")
-    cbs, cbe = [], []
-    bc = 0
-    for ch in dt: cbs.append(bc); bc += len(ch.encode("utf-8")); cbe.append(bc)
-    cs, ce = [], []
-    tbc = 0
-    for rb in tb:
-        tbs = tbc; tbe = tbs + len(rb); tbc = tbe
-        cs.append(bisect_right(cbe, tbs)); ce.append(bisect_left(cbs, tbe))
-    pcs = token_spans_to_char_spans(pts, cs, ce)
-    pcs = trim_char_spans_whitespace(pcs, dt if dt != text else text)
-    src = dt if dt != text else text
-    detected = []
-    for li, s, e in pcs:
-        if 0 <= li < len(runtime.label_info.span_class_names):
-            lbl = runtime.label_info.span_class_names[li]
-        else:
-            lbl = f"label_{li}"
-        detected.append({"label": lbl, "start": s, "end": e, "text": src[s:e]})
-    return src, detected
 # =====================================================================
@@ -637,10 +143,7 @@ def detect_speakers(text, spans):
 @spaces.GPU
 def run_pii_analysis(text: str):
     """GPU-accelerated PII detection."""
-    runtime = get_runtime()
-    decoder = get_decoder()
-    source_text, detected = predict_text(runtime, text, decoder)
-    return source_text, detected
 def build_redacted_pdf_bytes(pdf_path: str, pii_texts: list[str]) -> bytes:
@@ -691,6 +194,12 @@ def build_redacted_pdf_bytes(pdf_path: str, pii_texts: list[str]) -> bytes:
 # ── Gradio Server ────────────────────────────────────────────────
 server = gr.Server()
@@ -699,82 +208,96 @@ async def homepage():
     return FRONTEND_HTML
-@server.post("/api/analyze")
-async def analyze_document(file: UploadFile = File(...)):
-    suffix = Path(file.filename).suffix.lower()
     if suffix not in (".pdf", ".doc", ".docx"):
-        return JSONResponse({"error": f"Unsupported: {suffix}. Use PDF, DOC, or DOCX."}, 400)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
-        tmp.write(await file.read()); tmp_path = tmp.name
     try:
-        text = extract_text(tmp_path)
         if not text.strip():
-            return JSONResponse({"error": "No text content found."}, 400)
         source_text, spans = run_pii_analysis(text)
         stats = compute_stats(source_text, spans)
         speakers = detect_speakers(source_text, spans)
-        return JSONResponse({
-            "filename": file.filename, "text": source_text, "spans": spans,
-            "stats": stats, "speakers": speakers,
-            "categories_meta": {k: {"color": v["color"], "cls": v["cls"],
-                                    "label": v["label"], "mono": v["mono"]}
-                                for k, v in CATEGORIES_META.items()},
-        })
     except Exception as e:
-        return JSONResponse({"error": str(e)}, 500)
-    finally:
-        if os.path.exists(tmp_path): os.unlink(tmp_path)
-@server.post("/api/redact-pdf")
-async def redact_pdf_endpoint(
-    file: UploadFile = File(...),
-    spans: str = Form(...),
-    active: str = Form(...),
-):
-    suffix = Path(file.filename).suffix.lower()
     if suffix != ".pdf":
-        return JSONResponse({"error": "PDF redaction only accepts PDF input."}, 400)
     try:
         span_list = json.loads(spans)
         active_set = set(json.loads(active))
     except Exception as e:
-        return JSONResponse({"error": f"Invalid payload: {e}"}, 400)
     pii_texts = [
         s.get("text", "") for s in span_list
         if s.get("label") in active_set
     ]
     if not pii_texts:
-        return JSONResponse({"error": "No active categories selected — nothing to redact."}, 400)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
-        tmp.write(await file.read()); tmp_path = tmp.name
     try:
         t0 = time.perf_counter()
-        pdf_bytes = build_redacted_pdf_bytes(tmp_path, pii_texts)
-        elapsed = time.perf_counter() - t0
-        out_name = (Path(file.filename).stem or "document") + ".redacted.pdf"
-        return StreamingResponse(
-            io.BytesIO(pdf_bytes),
-            media_type="application/pdf",
-            headers={
-                "Content-Disposition": f'attachment; filename="{out_name}"',
-                "X-Redaction-Ms": str(int(elapsed * 1000)),
-            },
-        )
     except Exception as e:
-        return JSONResponse({"error": str(e)}, 500)
-    finally:
-        if os.path.exists(tmp_path): os.unlink(tmp_path)
 @server.api(name="analyze_text")
-def analyze_text_api(text: str) -> str:
-    """Gradio API: analyze raw text for PII."""
     source_text, spans = run_pii_analysis(text)
     stats = compute_stats(source_text, spans)
-    return json.dumps({"text": source_text, "spans": spans, "stats": stats}, ensure_ascii=False)
 # ── Frontend HTML (v6) ───────────────────────────────────────────
@@ -783,7 +306,7 @@ FRONTEND_HTML = r"""<!DOCTYPE html>
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width,initial-scale=1">
-<title>PII Explorer — Playground</title>
 <link rel="preconnect" href="https://fonts.googleapis.com">
 <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Source+Serif+4:opsz,wght@8..60,400;8..60,500;8..60,600&display=swap" rel="stylesheet">
@@ -1046,7 +569,7 @@ button{font:inherit;color:inherit;background:transparent;border:0;cursor:pointer
             <circle cx="8.5" cy="8.5" r="3.2" stroke="var(--block-background-fill)" stroke-width="1.4" fill="none"/>
             <line x1="11.2" y1="11.2" x2="14.2" y2="14.2" stroke="var(--block-background-fill)" stroke-width="1.4" stroke-linecap="round"/>
           </svg>
-          <span class="u-brand-name">PII Explorer<span class="sub">/ Playground</span></span>
         </div>
         <h1 class="u-title">See what your documents are leaking.</h1>
         <p class="u-sub">Find every PII span in a PDF, DOC or DOCX — names, accounts, secrets and five other entity types — then export a fully redacted copy.</p>
@@ -1074,10 +597,10 @@ button{font:inherit;color:inherit;background:transparent;border:0;cursor:pointer
         </div>
         <div class="u-meta">
-          <span><b>OpenAI Privacy Filter</b></span>
           <span>128k ctx</span>
           <span>apache 2.0</span>
-          <span><b>gr.Server</b></span>
         </div>
       </div>
@@ -1110,7 +633,7 @@ button{font:inherit;color:inherit;background:transparent;border:0;cursor:pointer
 <!-- ============ results view ============ -->
 <div id="results-view">
   <div class="shell">
-    <div class="pr-app" aria-label="PII Explorer Playground">
       <div class="pr-top">
         <div class="pr-logo">
@@ -1119,7 +642,7 @@ button{font:inherit;color:inherit;background:transparent;border:0;cursor:pointer
             <circle cx="8.5" cy="8.5" r="3.2" stroke="var(--block-background-fill)" stroke-width="1.4" fill="none"/>
             <line x1="11.2" y1="11.2" x2="14.2" y2="14.2" stroke="var(--block-background-fill)" stroke-width="1.4" stroke-linecap="round"/>
           </svg>
-          <span class="pr-name">PII Explorer<span class="pr-name-sub">/ Playground</span></span>
         </div>
         <span class="pr-file-chip" id="file-chip"></span>
         <span class="pr-status" id="scan-status"><span class="pr-status-dot"></span>Scan complete</span>
@@ -1191,7 +714,19 @@ button{font:inherit;color:inherit;background:transparent;border:0;cursor:pointer
 <div class="tip" id="tip" style="display:none"></div>
-<script>
 const S = {
   text:'', spans:[], stats:{}, speakers:{}, catMeta:{}, filename:'', file:null,
   activeCats:new Set(), scanMs:0, sortedSpans:[],
@@ -1244,20 +779,22 @@ async function uploadFile(file){
   S.file = file;
   showLoading('scanning document…');
   document.getElementById('upload-view').style.display='none';
-  const form = new FormData(); form.append('file', file);
   const t0 = performance.now();
   try{
-    const r = await fetch('/api/analyze', {method:'POST', body:form});
-    const d = await r.json();
     if (d.error) { showError(d.error); return; }
     S.scanMs = performance.now() - t0;
     S.text = d.text; S.spans = d.spans; S.stats = d.stats;
     S.speakers = d.speakers||{}; S.catMeta = d.categories_meta||{};
-    S.filename = d.filename;
     S.activeCats = new Set(Object.keys(d.stats.categories));
     S.sortedSpans = [...S.spans].sort((a,b) => a.start - b.start);
     renderResults();
-  } catch(e){ showError('Analysis failed: '+e.message); }
   finally { hideLoading(); }
 }
@@ -1511,20 +1048,18 @@ document.getElementById('act-pdf').addEventListener('click', async () => {
   btn.disabled = true;
   showLoading('redacting PDF…');
   try {
-    const form = new FormData();
-    form.append('file', S.file);
-    form.append('spans', JSON.stringify(S.spans));
-    form.append('active', JSON.stringify([...S.activeCats]));
-    const r = await fetch('/api/redact-pdf', { method:'POST', body: form });
-    if (!r.ok) {
-      let err = `Redaction failed (${r.status})`;
-      try { const j = await r.json(); err = j.error || err; } catch {}
-      throw new Error(err);
-    }
-    const elapsedHeader = r.headers.get('X-Redaction-Ms');
-    const blob = await r.blob();
     download(baseName() + '.redacted.pdf', blob, 'application/pdf');
-    if (elapsedHeader) flash('act-pdf', `Downloaded (${(elapsedHeader/1000).toFixed(1)}s)`);
     else flash('act-pdf', 'Downloaded');
   } catch (e) {
     alert(e.message || 'Redaction failed');

 """
+=======================================
+PII Reveal - Document Privacy Explorer
+=======================================
+Uploads a PDF/DOC/DOCX, runs the openai/privacy-filter model over the
+extracted text, and returns per-span character offsets + stats for an
+interactive reader view. Also supports building a black-bar redacted PDF.
+Inference path: `transformers.pipeline("token-classification",
+"openai/privacy-filter", aggregation_strategy="simple")` — the pipeline
+takes care of BIOES → char-level span aggregation for us.
+PDF redaction (build_redacted_pdf_bytes) is optimized for large files:
+per-page `needle in page_text` prefilter before page.search_for, skip
+apply_redactions on pages with no matches, and save with garbage=1 to
+avoid the expensive stream-recompression pass.
 """
 # ── stdlib ───────────────────────────────────────────────────────
 import functools
 import io
 import json
 import os
 import re
 import tempfile
 import time
 from pathlib import Path
 # ── third-party ──────────────────────────────────────────────────
 import gradio as gr
 import spaces
 import torch
+from fastapi.responses import HTMLResponse
+from gradio.data_classes import FileData
 # ── configuration ────────────────────────────────────────────────
+PII_MODEL_REPO = os.getenv("MODEL_ID", "openai/privacy-filter")
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 CATEGORIES_META = {
     "private_person":  {"color": "#E24B4A", "cls": "hp",  "label": "Person",  "mono": False},
 }
 # =====================================================================
+# MODEL  INFERENCE  (transformers pipeline — openai/privacy-filter)
 # =====================================================================
 @functools.lru_cache(maxsize=1)
+def get_pii_pipeline():
+    """Lazy-load the privacy filter on the GPU. Cached so repeated calls
+    inside a single ZeroGPU slot don't re-move weights."""
+    from transformers import pipeline
+    return pipeline(
+        task="token-classification",
+        model=PII_MODEL_REPO,
+        aggregation_strategy="simple",  # merges BIOES tags into char-level spans
+        device=0,
+        torch_dtype=torch.bfloat16,
+        token=HF_TOKEN,
+    )
+def predict_text(text: str) -> tuple[str, list[dict]]:
+    """Returns (source_text, spans). `spans` is a list of
+    {label, start, end, text} with character offsets into `text`."""
+    if not text.strip():
+        return text, []
+    pipe = get_pii_pipeline()
+    results = pipe(text)
+    spans = []
+    for r in results:
+        label = r.get("entity_group") or r.get("entity")
+        if not label or label == "O":
+            continue
+        s, e = int(r["start"]), int(r["end"])
+        if e <= s or s < 0 or e > len(text):
+            continue
+        spans.append({"label": label, "start": s, "end": e, "text": text[s:e]})
+    return text, spans
 # =====================================================================
 @spaces.GPU
 def run_pii_analysis(text: str):
     """GPU-accelerated PII detection."""
+    return predict_text(text)
 def build_redacted_pdf_bytes(pdf_path: str, pii_texts: list[str]) -> bytes:
 # ── Gradio Server ────────────────────────────────────────────────
+#
+# We only keep one plain FastAPI route here — the homepage, which
+# serves the static HTML shell. The heavy lifting endpoints are
+# declared with @server.api, which wraps them in Gradio's queue so
+# they compose correctly with @spaces.GPU on ZeroGPU and with the
+# gradio_client / @gradio/client SDKs.
 server = gr.Server()
     return FRONTEND_HTML
+@server.api(name="analyze_document")
+def analyze_document_api(file: FileData) -> dict:
+    """Extract text from an uploaded PDF/DOC/DOCX and run the OPF
+    privacy filter over it. Returns the detected spans, stats,
+    per-speaker counts, and the category color/label table.
+    Called from the browser via @gradio/client:
+        client.predict("/analyze_document", { file: handle_file(f) })
+    And from Python via gradio_client:
+        client.predict("/analyze_document", file=handle_file(path))
+    """
+    path = file.get("path") or ""
+    suffix = Path(path).suffix.lower()
+    orig_name = file.get("orig_name") or Path(path).name
     if suffix not in (".pdf", ".doc", ".docx"):
+        return {"error": f"Unsupported: {suffix}. Use PDF, DOC, or DOCX."}
     try:
+        text = extract_text(path)
         if not text.strip():
+            return {"error": "No text content found."}
         source_text, spans = run_pii_analysis(text)
         stats = compute_stats(source_text, spans)
         speakers = detect_speakers(source_text, spans)
+        return {
+            "filename": orig_name,
+            "text": source_text,
+            "spans": spans,
+            "stats": stats,
+            "speakers": speakers,
+            "categories_meta": {
+                k: {"color": v["color"], "cls": v["cls"],
+                    "label": v["label"], "mono": v["mono"]}
+                for k, v in CATEGORIES_META.items()
+            },
+        }
     except Exception as e:
+        return {"error": str(e)}
+@server.api(name="redact_pdf")
+def redact_pdf_api(file: FileData, spans: str, active: str) -> dict:
+    """Build a black-bar-redacted PDF from an uploaded PDF plus the
+    list of spans the browser wants redacted. `spans` and `active`
+    are JSON strings because the JS client serializes complex objects
+    more predictably as strings than as nested dicts.
+    Returns {"pdf": FileData, "elapsed_ms": int} so the caller can
+    download the file and also display timing."""
+    path = file.get("path") or ""
+    suffix = Path(path).suffix.lower()
     if suffix != ".pdf":
+        return {"error": "PDF redaction only accepts PDF input."}
     try:
         span_list = json.loads(spans)
         active_set = set(json.loads(active))
     except Exception as e:
+        return {"error": f"Invalid payload: {e}"}
     pii_texts = [
         s.get("text", "") for s in span_list
         if s.get("label") in active_set
     ]
     if not pii_texts:
+        return {"error": "No active categories selected — nothing to redact."}
     try:
         t0 = time.perf_counter()
+        pdf_bytes = build_redacted_pdf_bytes(path, pii_texts)
+        elapsed_ms = int((time.perf_counter() - t0) * 1000)
     except Exception as e:
+        return {"error": str(e)}
+    orig_name = file.get("orig_name") or Path(path).name
+    stem = Path(orig_name).stem or "document"
+    out_path = Path(tempfile.gettempdir()) / f"{stem}.redacted.pdf"
+    out_path.write_bytes(pdf_bytes)
+    return {
+        "pdf": FileData(path=str(out_path)),
+        "elapsed_ms": elapsed_ms,
+    }
 @server.api(name="analyze_text")
+def analyze_text_api(text: str) -> dict:
+    """Analyze raw text for PII — convenient for gradio_client users
+    who don't want to build a PDF just to test the model."""
     source_text, spans = run_pii_analysis(text)
     stats = compute_stats(source_text, spans)
+    return {"text": source_text, "spans": spans, "stats": stats}
 # ── Frontend HTML (v6) ───────────────────────────────────────────
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width,initial-scale=1">
+<title>PII Reveal — Inspector</title>
 <link rel="preconnect" href="https://fonts.googleapis.com">
 <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Source+Serif+4:opsz,wght@8..60,400;8..60,500;8..60,600&display=swap" rel="stylesheet">
             <circle cx="8.5" cy="8.5" r="3.2" stroke="var(--block-background-fill)" stroke-width="1.4" fill="none"/>
             <line x1="11.2" y1="11.2" x2="14.2" y2="14.2" stroke="var(--block-background-fill)" stroke-width="1.4" stroke-linecap="round"/>
           </svg>
+          <span class="u-brand-name">PII Reveal<span class="sub">/ inspector</span></span>
         </div>
         <h1 class="u-title">See what your documents are leaking.</h1>
         <p class="u-sub">Find every PII span in a PDF, DOC or DOCX — names, accounts, secrets and five other entity types — then export a fully redacted copy.</p>
         </div>
         <div class="u-meta">
+          <span>openai privacy filter</span>
           <span>128k ctx</span>
+          <span>bfloat16</span>
           <span>apache 2.0</span>
         </div>
       </div>
 <!-- ============ results view ============ -->
 <div id="results-view">
   <div class="shell">
+    <div class="pr-app" aria-label="PII Reveal inspector">
       <div class="pr-top">
         <div class="pr-logo">
             <circle cx="8.5" cy="8.5" r="3.2" stroke="var(--block-background-fill)" stroke-width="1.4" fill="none"/>
             <line x1="11.2" y1="11.2" x2="14.2" y2="14.2" stroke="var(--block-background-fill)" stroke-width="1.4" stroke-linecap="round"/>
           </svg>
+          <span class="pr-name">PII Reveal<span class="pr-name-sub">/ inspector</span></span>
         </div>
         <span class="pr-file-chip" id="file-chip"></span>
         <span class="pr-status" id="scan-status"><span class="pr-status-dot"></span>Scan complete</span>
 <div class="tip" id="tip" style="display:none"></div>
+<script type="module">
+// ══════════════════════════════════════════════════════════════════
+// Gradio JS client — /api/analyze and /api/redact-pdf were plain
+// FastAPI routes in the old version, which meant requests bypassed
+// Gradio's queue entirely. Now the backend exposes @server.api
+// routes and we call them through the Client, which gives us queue
+// serialization, progress events, and correct ZeroGPU allocation
+// via @spaces.GPU.
+// ══════════════════════════════════════════════════════════════════
+import { Client, handle_file } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
+const clientPromise = Client.connect(window.location.origin);
 const S = {
   text:'', spans:[], stats:{}, speakers:{}, catMeta:{}, filename:'', file:null,
   activeCats:new Set(), scanMs:0, sortedSpans:[],
   S.file = file;
   showLoading('scanning document…');
   document.getElementById('upload-view').style.display='none';
   const t0 = performance.now();
   try{
+    const client = await clientPromise;
+    const result = await client.predict("/analyze_document", {
+      file: handle_file(file),
+    });
+    const d = result.data[0] || {};
     if (d.error) { showError(d.error); return; }
     S.scanMs = performance.now() - t0;
     S.text = d.text; S.spans = d.spans; S.stats = d.stats;
     S.speakers = d.speakers||{}; S.catMeta = d.categories_meta||{};
+    S.filename = d.filename || file.name;
     S.activeCats = new Set(Object.keys(d.stats.categories));
     S.sortedSpans = [...S.spans].sort((a,b) => a.start - b.start);
     renderResults();
+  } catch(e){ showError('Analysis failed: '+(e && e.message ? e.message : e)); }
   finally { hideLoading(); }
 }
   btn.disabled = true;
   showLoading('redacting PDF…');
   try {
+    const client = await clientPromise;
+    const result = await client.predict("/redact_pdf", {
+      file: handle_file(S.file),
+      spans: JSON.stringify(S.spans),
+      active: JSON.stringify([...S.activeCats]),
+    });
+    const d = result.data[0] || {};
+    if (d.error) throw new Error(d.error);
+    if (!d.pdf || !d.pdf.url) throw new Error('No PDF returned.');
+    const blob = await (await fetch(d.pdf.url)).blob();
     download(baseName() + '.redacted.pdf', blob, 'application/pdf');
+    if (typeof d.elapsed_ms === 'number') flash('act-pdf', `Downloaded (${(d.elapsed_ms/1000).toFixed(1)}s)`);
     else flash('act-pdf', 'Downloaded');
   } catch (e) {
     alert(e.message || 'Redaction failed');