ysharma HF Staff commited on
Commit
f78f0c3
Β·
verified Β·
1 Parent(s): f949461

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -77
app.py CHANGED
@@ -1,7 +1,30 @@
1
  """
 
2
  ================================================
3
- SmartRedact Paste β€” "Pastebin with a conscience"
4
- ================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
7
  from __future__ import annotations
@@ -16,7 +39,6 @@ from dataclasses import dataclass
16
  from typing import Optional
17
 
18
  import gradio as gr
19
- from fastapi import Request
20
  from fastapi.responses import HTMLResponse, JSONResponse
21
 
22
  # spaces is only available on Hugging Face Spaces; degrade gracefully
@@ -161,37 +183,37 @@ async def home():
161
  return HTMLResponse(_COMPOSE_HTML)
162
 
163
 
164
- @server.post("/api/paste")
165
- async def create_paste(req: Request):
166
- try:
167
- body = await req.json()
168
- except Exception:
169
- return JSONResponse({"error": "Expected JSON body"}, status_code=400)
170
 
171
- text = (body.get("text") or "").strip()
172
- ttl_key = body.get("ttl", "never")
 
 
 
 
 
 
173
  if not text:
174
- return JSONResponse({"error": "Paste is empty"}, status_code=400)
175
  if len(text) > MAX_PASTE_CHARS:
176
- return JSONResponse(
177
- {"error": f"Paste exceeds {MAX_PASTE_CHARS:,} characters"},
178
- status_code=413,
179
- )
180
- if ttl_key not in TTL_CHOICES:
181
- return JSONResponse({"error": f"Unknown ttl {ttl_key!r}"}, status_code=400)
182
 
183
  try:
184
  source_text, spans = analyze(text)
185
- except Exception as exc: # model failure is the only realistic path here
186
- return JSONResponse({"error": f"OPF inference failed: {exc}"},
187
- status_code=500)
188
 
189
  redacted = redact(source_text, spans)
190
  stats = compute_stats(source_text, spans)
191
 
192
  pid = secrets.token_urlsafe(6)
193
  reveal_token = secrets.token_urlsafe(22)
194
- ttl_sec = TTL_CHOICES[ttl_key]
195
  now = time.time()
196
  expires_at = (now + ttl_sec) if ttl_sec is not None else None
197
 
@@ -202,15 +224,16 @@ async def create_paste(req: Request):
202
  created_at=now, expires_at=expires_at,
203
  ))
204
 
205
- return JSONResponse({
206
  "id": pid,
207
  "reveal_token": reveal_token,
208
  "view_path": f"/view/{pid}",
209
  "reveal_path": f"/view/{pid}?token={reveal_token}",
210
  "expires_at": expires_at,
211
  "stats": stats,
 
212
  "categories_meta": CATEGORIES_META,
213
- })
214
 
215
 
216
  @server.get("/view/{pid}", response_class=HTMLResponse)
@@ -232,6 +255,7 @@ async def view_paste(pid: str, token: Optional[str] = None):
232
 
233
  @server.get("/api/paste/{pid}")
234
  async def api_get_paste(pid: str, token: Optional[str] = None):
 
235
  p = _store_get(pid)
236
  if p is None:
237
  return JSONResponse({"error": "not found or expired"}, status_code=404)
@@ -251,39 +275,6 @@ async def api_get_paste(pid: str, token: Optional[str] = None):
251
  return JSONResponse(payload)
252
 
253
 
254
- @server.api(name="analyze_paste")
255
- def analyze_paste_api(text: str, ttl: str = "never") -> str:
256
- """Programmatic endpoint for gradio-client SDK.
257
-
258
- Creates a paste and returns the paste id, reveal token, and stats as
259
- a JSON string. Callers must combine the paths with the Space's base
260
- URL to form shareable links."""
261
- if ttl not in TTL_CHOICES:
262
- return json.dumps({"error": f"Unknown ttl {ttl!r}"})
263
- source_text, spans = analyze(text)
264
- redacted = redact(source_text, spans)
265
- stats = compute_stats(source_text, spans)
266
- pid = secrets.token_urlsafe(6)
267
- reveal_token = secrets.token_urlsafe(22)
268
- ttl_sec = TTL_CHOICES[ttl]
269
- now = time.time()
270
- expires_at = (now + ttl_sec) if ttl_sec is not None else None
271
- _store_put(Paste(
272
- id=pid, reveal_token=reveal_token,
273
- original=source_text, redacted=redacted,
274
- spans=spans, stats=stats,
275
- created_at=now, expires_at=expires_at,
276
- ))
277
- return json.dumps({
278
- "id": pid,
279
- "reveal_token": reveal_token,
280
- "view_path": f"/view/{pid}",
281
- "reveal_path": f"/view/{pid}?token={reveal_token}",
282
- "expires_at": expires_at,
283
- "stats": stats,
284
- })
285
-
286
-
287
  # ── HTML rendering ────────────────────────────────────────────────
288
 
289
  def _escape(text: str) -> str:
@@ -505,7 +496,7 @@ _COMPOSE_HTML = r"""<!DOCTYPE html>
505
  <head>
506
  <meta charset="UTF-8">
507
  <meta name="viewport" content="width=device-width,initial-scale=1">
508
- <title>SmartRedact Paste β€” Pastebin with a conscience</title>
509
  <link rel="preconnect" href="https://fonts.googleapis.com">
510
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
511
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
@@ -612,7 +603,7 @@ _COMPOSE_HTML = r"""<!DOCTYPE html>
612
 
613
  .pp-footer{
614
  margin-top:28px;padding-top:22px;border-top:0.5px solid var(--line);
615
- display:flex;justify-content:space-between;gap:16px;color:var(--ink-faint);font-size:14px;flex-wrap:wrap;
616
  }
617
  .pp-footer a{color:var(--ink-dim)}
618
 
@@ -642,7 +633,7 @@ _COMPOSE_HTML = r"""<!DOCTYPE html>
642
 
643
  <div class="pp-brand">
644
  <div class="pp-brand-mark">P</div>
645
- <div class="pp-brand-name">SmartRedact Paste<span class="sub">pastebin with a conscience</span></div>
646
  </div>
647
 
648
  <div class="pp-hero">
@@ -727,11 +718,21 @@ _COMPOSE_HTML = r"""<!DOCTYPE html>
727
  </section>
728
 
729
  <footer class="pp-footer">
730
- <div>Powered by <b>gr.Server</b> Β· <a href="https://huggingface.co/openai/privacy-filter" target="_blank" rel="noopener"><b>OpenAI Privacy Filter</b></a> Β· 1.5B params, 50M active, 128k context</div>
 
731
  </footer>
732
  </div>
733
 
734
- <script>
 
 
 
 
 
 
 
 
 
735
  const CATS = """ + _CATEGORIES_JSON + r""";
736
  const MAX = """ + str(MAX_PASTE_CHARS) + r""";
737
 
@@ -795,21 +796,18 @@ async function createPaste(){
795
 
796
  $btn.disabled = true; $load.classList.add('on'); $ok.classList.remove('on');
797
  try{
798
- const r = await fetch('/api/paste', {
799
- method: 'POST',
800
- headers: {'Content-Type': 'application/json'},
801
- body: JSON.stringify({text, ttl}),
802
- });
803
- const data = await r.json();
804
- if (!r.ok) throw new Error(data.error || ('HTTP ' + r.status));
805
 
806
  const origin = window.location.origin;
807
  document.getElementById('pp-view-url').value = origin + data.view_path;
808
  document.getElementById('pp-reveal-url').value = origin + data.reveal_path;
809
 
810
- // Fetch public redacted version to preview
811
- const pv = await fetch('/api/paste/' + data.id).then(x => x.json());
812
- document.getElementById('pp-preview-redacted').innerHTML = renderRedacted(pv.redacted);
813
 
814
  const s = data.stats;
815
  const cats = Object.entries(s.categories).sort((a,b) => b[1].count - a[1].count);
@@ -860,7 +858,7 @@ _VIEW_HTML = r"""<!DOCTYPE html>
860
  <head>
861
  <meta charset="UTF-8">
862
  <meta name="viewport" content="width=device-width,initial-scale=1">
863
- <title>Paste __PID__ β€” SmartRedact Paste</title>
864
  <link rel="preconnect" href="https://fonts.googleapis.com">
865
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
866
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
@@ -934,7 +932,7 @@ _VIEW_HTML = r"""<!DOCTYPE html>
934
  <div class="pp-brand">
935
  <a href="/" style="text-decoration:none;display:flex;align-items:center;gap:10px">
936
  <div class="pp-brand-mark">P</div>
937
- <div class="pp-brand-name">SmartRedact Paste<span class="sub">pastebin with a conscience</span></div>
938
  </a>
939
  </div>
940
 
@@ -977,7 +975,7 @@ _NOT_FOUND_HTML = r"""<!DOCTYPE html>
977
  <head>
978
  <meta charset="UTF-8">
979
  <meta name="viewport" content="width=device-width,initial-scale=1">
980
- <title>Paste not found β€” SmartRedact Paste</title>
981
  <link rel="preconnect" href="https://fonts.googleapis.com">
982
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
983
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
@@ -998,7 +996,7 @@ _NOT_FOUND_HTML = r"""<!DOCTYPE html>
998
  <div class="pp-brand">
999
  <a href="/" style="text-decoration:none;display:flex;align-items:center;gap:10px">
1000
  <div class="pp-brand-mark">P</div>
1001
- <div class="pp-brand-name">SmartRedact Paste<span class="sub">pastebin with a conscience</span></div>
1002
  </a>
1003
  </div>
1004
  <div class="pp-404">
@@ -1015,4 +1013,4 @@ _NOT_FOUND_HTML = r"""<!DOCTYPE html>
1015
  # ── launch ────────────────────────────────────────────────────────
1016
 
1017
  if __name__ == "__main__":
1018
- server.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
1
  """
2
+ DLP Paste-Proxy β€” "Pastebin with a conscience"
3
  ================================================
4
+
5
+ A sleek paste-to-share service. The author pastes PII-rich text and gets
6
+ a shareable URL. Recipients at that URL see the OPF-redacted version by
7
+ default; a separate "reveal" link guarded by an unguessable token shows
8
+ the original.
9
+
10
+ Why gr.Server? We need four HTTP surfaces that don't map cleanly onto
11
+ gr.Blocks event wiring:
12
+ * @server.api create_paste - accept paste, run OPF, mint IDs
13
+ (queued compute β†’ @gradio/client)
14
+ * GET /view/{id} - public redacted view page
15
+ * GET /view/{id}?token=... - author's reveal page
16
+ * GET /api/paste/{pid} - JSON lookup of an existing paste
17
+ plus a background sweeper for auto-expiry.
18
+
19
+ The create_paste endpoint is the only one that runs the OPF model,
20
+ so it is the only one that needs Gradio's queue + ZeroGPU wiring β€”
21
+ the other three are pure lookup/rendering and are served as plain
22
+ FastAPI routes, which the gradio.Server blog recommends for static
23
+ content.
24
+
25
+ Storage is an in-process dict. That is fine for a public demo β€” the
26
+ point is to illustrate the request-composition model; it is NOT a
27
+ durable pastebin. Restarting the Space clears all pastes.
28
  """
29
 
30
  from __future__ import annotations
 
39
  from typing import Optional
40
 
41
  import gradio as gr
 
42
  from fastapi.responses import HTMLResponse, JSONResponse
43
 
44
  # spaces is only available on Hugging Face Spaces; degrade gracefully
 
183
  return HTMLResponse(_COMPOSE_HTML)
184
 
185
 
186
+ @server.api(name="create_paste")
187
+ def create_paste_api(text: str, ttl: str = "never") -> dict:
188
+ """Scan text with the OPF model, mint a paste id + reveal token,
189
+ and return all the metadata the caller needs to build share URLs.
 
 
190
 
191
+ This is a @server.api route (not a plain FastAPI POST) because it
192
+ runs the @spaces.GPU-decorated `analyze` β€” we want the request to
193
+ go through Gradio's queue so concurrent callers don't stomp on
194
+ each other's ZeroGPU allocations. Both the browser (via
195
+ @gradio/client) and Python clients (via gradio_client) hit the
196
+ same endpoint.
197
+ """
198
+ text = (text or "").strip()
199
  if not text:
200
+ return {"error": "Paste is empty"}
201
  if len(text) > MAX_PASTE_CHARS:
202
+ return {"error": f"Paste exceeds {MAX_PASTE_CHARS:,} characters"}
203
+ if ttl not in TTL_CHOICES:
204
+ return {"error": f"Unknown ttl {ttl!r}"}
 
 
 
205
 
206
  try:
207
  source_text, spans = analyze(text)
208
+ except Exception as exc:
209
+ return {"error": f"OPF inference failed: {exc}"}
 
210
 
211
  redacted = redact(source_text, spans)
212
  stats = compute_stats(source_text, spans)
213
 
214
  pid = secrets.token_urlsafe(6)
215
  reveal_token = secrets.token_urlsafe(22)
216
+ ttl_sec = TTL_CHOICES[ttl]
217
  now = time.time()
218
  expires_at = (now + ttl_sec) if ttl_sec is not None else None
219
 
 
224
  created_at=now, expires_at=expires_at,
225
  ))
226
 
227
+ return {
228
  "id": pid,
229
  "reveal_token": reveal_token,
230
  "view_path": f"/view/{pid}",
231
  "reveal_path": f"/view/{pid}?token={reveal_token}",
232
  "expires_at": expires_at,
233
  "stats": stats,
234
+ "redacted": redacted, # let the frontend render a preview without a second round-trip
235
  "categories_meta": CATEGORIES_META,
236
+ }
237
 
238
 
239
  @server.get("/view/{pid}", response_class=HTMLResponse)
 
255
 
256
  @server.get("/api/paste/{pid}")
257
  async def api_get_paste(pid: str, token: Optional[str] = None):
258
+ """Plain FastAPI read-only lookup β€” no compute, no queue needed."""
259
  p = _store_get(pid)
260
  if p is None:
261
  return JSONResponse({"error": "not found or expired"}, status_code=404)
 
275
  return JSONResponse(payload)
276
 
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  # ── HTML rendering ────────────────────────────────────────────────
279
 
280
  def _escape(text: str) -> str:
 
496
  <head>
497
  <meta charset="UTF-8">
498
  <meta name="viewport" content="width=device-width,initial-scale=1">
499
+ <title>DLP Paste-Proxy β€” Pastebin with a conscience</title>
500
  <link rel="preconnect" href="https://fonts.googleapis.com">
501
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
502
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
 
603
 
604
  .pp-footer{
605
  margin-top:28px;padding-top:22px;border-top:0.5px solid var(--line);
606
+ display:flex;justify-content:space-between;gap:16px;color:var(--ink-faint);font-size:12px;flex-wrap:wrap;
607
  }
608
  .pp-footer a{color:var(--ink-dim)}
609
 
 
633
 
634
  <div class="pp-brand">
635
  <div class="pp-brand-mark">P</div>
636
+ <div class="pp-brand-name">DLP Paste-Proxy<span class="sub">pastebin with a conscience</span></div>
637
  </div>
638
 
639
  <div class="pp-hero">
 
718
  </section>
719
 
720
  <footer class="pp-footer">
721
+ <div>Powered by <a href="https://huggingface.co/openai/privacy-filter" target="_blank" rel="noopener">OpenAI Privacy Filter</a> Β· 1.5B params, 50M active, 128k context</div>
722
+ <div><a href="#" id="pp-about">How this works β†’</a></div>
723
  </footer>
724
  </div>
725
 
726
+ <script type="module">
727
+ // ══════════════════════════════════════════════════════════════════
728
+ // Gradio JS client β€” hits the queued @server.api create_paste route
729
+ // so the OPF model call is serialized through Gradio's queue and
730
+ // plays nicely with @spaces.GPU on ZeroGPU.
731
+ // ══════════════════════════════════════════════════════════════════
732
+ import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
733
+
734
+ const clientPromise = Client.connect(window.location.origin);
735
+
736
  const CATS = """ + _CATEGORIES_JSON + r""";
737
  const MAX = """ + str(MAX_PASTE_CHARS) + r""";
738
 
 
796
 
797
  $btn.disabled = true; $load.classList.add('on'); $ok.classList.remove('on');
798
  try{
799
+ const client = await clientPromise;
800
+ const result = await client.predict("/create_paste", { text, ttl });
801
+ const data = result.data[0] || {};
802
+ if (data.error) throw new Error(data.error);
 
 
 
803
 
804
  const origin = window.location.origin;
805
  document.getElementById('pp-view-url').value = origin + data.view_path;
806
  document.getElementById('pp-reveal-url').value = origin + data.reveal_path;
807
 
808
+ // create_paste already returns the redacted preview in-line, so
809
+ // no second round-trip to /api/paste/{id} is needed here.
810
+ document.getElementById('pp-preview-redacted').innerHTML = renderRedacted(data.redacted || '');
811
 
812
  const s = data.stats;
813
  const cats = Object.entries(s.categories).sort((a,b) => b[1].count - a[1].count);
 
858
  <head>
859
  <meta charset="UTF-8">
860
  <meta name="viewport" content="width=device-width,initial-scale=1">
861
+ <title>Paste __PID__ β€” DLP Paste-Proxy</title>
862
  <link rel="preconnect" href="https://fonts.googleapis.com">
863
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
864
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
 
932
  <div class="pp-brand">
933
  <a href="/" style="text-decoration:none;display:flex;align-items:center;gap:10px">
934
  <div class="pp-brand-mark">P</div>
935
+ <div class="pp-brand-name">DLP Paste-Proxy<span class="sub">pastebin with a conscience</span></div>
936
  </a>
937
  </div>
938
 
 
975
  <head>
976
  <meta charset="UTF-8">
977
  <meta name="viewport" content="width=device-width,initial-scale=1">
978
+ <title>Paste not found β€” DLP Paste-Proxy</title>
979
  <link rel="preconnect" href="https://fonts.googleapis.com">
980
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
981
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&family=Instrument+Serif:ital@0;1&display=swap" rel="stylesheet">
 
996
  <div class="pp-brand">
997
  <a href="/" style="text-decoration:none;display:flex;align-items:center;gap:10px">
998
  <div class="pp-brand-mark">P</div>
999
+ <div class="pp-brand-name">DLP Paste-Proxy<span class="sub">pastebin with a conscience</span></div>
1000
  </a>
1001
  </div>
1002
  <div class="pp-404">
 
1013
  # ── launch ────────────────────────────────────────────────────────
1014
 
1015
  if __name__ == "__main__":
1016
+ server.launch(server_name="0.0.0.0", server_port=7860)