Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| modeldna β HuggingFace Space | |
| Interactive model provenance scanner. | |
| Replaces the stale RadicalNotionAI/modelatlas-dashboard Space. | |
| Deployed at: https://huggingface.co/spaces/RadicalNotionAI/modeldna | |
| Custom domain: modeldna.ai (via HF Space custom domain setting) | |
| """ | |
| import gradio as gr | |
| import json | |
| import sys | |
| import time | |
| from pathlib import Path | |
| # scan.py is in the same directory as app.py in both local hf_space/ and on HF | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| from scan import scan, KNOWN_BASES | |
| # ββ Discovery: find derivatives that may not attribute properly ββββββββββββ | |
| def find_unattributed_derivatives(base_match: str, scanned_id: str) -> list[dict]: | |
| """ | |
| Query the scan results database for models sharing the same base | |
| that don't declare attribution to their source. | |
| Returns models that appear derivative but lack proper attribution. | |
| """ | |
| try: | |
| import psycopg2 | |
| conn = psycopg2.connect( | |
| "postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim" | |
| ) | |
| cur = conn.cursor() | |
| # Find models in the scan results that match this base but lack attribution | |
| # (placeholder query β will be populated as scans accumulate) | |
| cur.execute(""" | |
| SELECT model_id, confirmed_base, has_attribution, downloads | |
| FROM modeldna_scans | |
| WHERE confirmed_base = %s | |
| AND model_id != %s | |
| AND (has_attribution = false OR has_attribution IS NULL) | |
| ORDER BY downloads DESC NULLS LAST | |
| LIMIT 5 | |
| """, (base_match, scanned_id)) | |
| rows = cur.fetchall() | |
| cur.close(); conn.close() | |
| return [{"model_id": r[0], "confirmed_base": r[1], "downloads": r[3]} for r in rows] | |
| except Exception: | |
| return [] | |
| def store_scan_result(result: dict) -> None: | |
| """Store a scan result for future derivative discovery.""" | |
| try: | |
| import psycopg2 | |
| conn = psycopg2.connect( | |
| "postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim" | |
| ) | |
| cur = conn.cursor() | |
| cur.execute(""" | |
| CREATE TABLE IF NOT EXISTS modeldna_scans ( | |
| id SERIAL PRIMARY KEY, | |
| model_id TEXT UNIQUE, | |
| confirmed_base TEXT, | |
| confidence TEXT, | |
| has_attribution BOOLEAN, | |
| flag_count INT, | |
| downloads INT, | |
| scanned_at TIMESTAMPTZ DEFAULT now() | |
| ) | |
| """) | |
| v = result.get("verdict", {}) | |
| m = result.get("metadata", {}) | |
| e = result.get("evidence", {}) | |
| has_attr = bool(e.get("claimed_base")) | |
| cur.execute(""" | |
| INSERT INTO modeldna_scans | |
| (model_id, confirmed_base, confidence, has_attribution, flag_count, downloads) | |
| VALUES (%s, %s, %s, %s, %s, %s) | |
| ON CONFLICT (model_id) DO UPDATE | |
| SET confidence=EXCLUDED.confidence, | |
| has_attribution=EXCLUDED.has_attribution, | |
| flag_count=EXCLUDED.flag_count, | |
| downloads=EXCLUDED.downloads, | |
| scanned_at=now() | |
| """, ( | |
| result.get("model_id"), | |
| v.get("base_model_confirmed"), | |
| v.get("confidence"), | |
| has_attr, | |
| v.get("flag_count", 0), | |
| m.get("downloads", 0), | |
| )) | |
| conn.commit(); cur.close(); conn.close() | |
| except Exception: | |
| pass # graceful β don't break the scan if storage fails | |
| def format_verdict(result: dict) -> tuple[str, str, str]: | |
| """Format scan result into three UI sections.""" | |
| if "error" in result: | |
| return ( | |
| "β Scan Failed", | |
| f"**Error**: {result['error']}", | |
| "" | |
| ) | |
| v = result.get("verdict", {}) | |
| e = result.get("evidence", {}) | |
| m = result.get("metadata", {}) | |
| flags = v.get("flags", []) | |
| # Header | |
| confidence_emoji = {"HIGH": "β ", "MODERATE": "β οΈ", "NONE": "β"}.get(v.get("confidence",""), "β") | |
| header = f"{confidence_emoji} **{v.get('architecture', 'Unknown')}**" | |
| header += f"\n\n*Scanned in {result.get('elapsed_s', '?')}s Β· Stage 1 (config-only)*" | |
| header += f"\n\nπ₯ {m.get('downloads',0):,} downloads Β· π {m.get('likes',0)} likes" | |
| # Verdict details | |
| details = f"### Architecture Confirmation\n" | |
| details += f"**Base model**: {v.get('base_model_confirmed', 'Unrecognized')}\n" | |
| details += f"**Confidence**: {v.get('confidence', 'None')}\n\n" | |
| if e.get("base_matches"): | |
| details += "**Evidence**:\n" | |
| for bm in e["base_matches"][:2]: | |
| for ev in bm.get("evidence", []): | |
| details += f"- {ev}\n" | |
| details += "\n" | |
| if e.get("modelatlas_similar"): | |
| details += "**Similar verified models** (ModelAtlas reference):\n" | |
| for s in e["modelatlas_similar"][:3]: | |
| details += f"- `{s['model_id']}`\n" | |
| # Flags | |
| flag_text = "" | |
| if flags: | |
| flag_text = f"### β οΈ {len(flags)} Flag(s) Found\n\n" | |
| for f in flags: | |
| flag_text += f"**[{f['type']}]**\n\n{f['explanation']}\n\n---\n\n" | |
| else: | |
| flag_text = "### β No Flags\n\nNo suspicious claims detected in model name or metadata." | |
| return header, details, flag_text | |
| def run_scan(model_id: str) -> tuple[str, str, str, str]: | |
| """Main scan function called by Gradio.""" | |
| model_id = model_id.strip() | |
| if not model_id: | |
| return "Enter a HuggingFace model ID above.", "", "", "" | |
| # Normalize: handle full URLs | |
| if "huggingface.co/" in model_id: | |
| model_id = model_id.split("huggingface.co/")[-1].strip("/") | |
| result = scan(model_id) | |
| # Store result for derivative discovery | |
| store_scan_result(result) | |
| # Find unattributed derivatives | |
| base = result.get("verdict", {}).get("base_model_confirmed", "") | |
| derivatives = find_unattributed_derivatives(base, model_id) if base else [] | |
| header, details, flags = format_verdict(result) | |
| # Derivative discovery section | |
| discovery = "" | |
| if derivatives: | |
| discovery = f"### π {len(derivatives)} Related Models Found Without Attribution\n\n" | |
| discovery += "These models share the same architecture base but don't declare it:\n\n" | |
| for d in derivatives: | |
| discovery += f"- `{d['model_id']}` ({d.get('downloads',0):,} downloads)\n" | |
| else: | |
| discovery = ( | |
| "### π Derivative Discovery\n\n" | |
| "This scan has been stored. As similar models are scanned, " | |
| "derivatives that don't properly attribute their source will appear here." | |
| ) | |
| return header, details, flags, discovery | |
| # ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| EXAMPLES = [ | |
| "Qwen/Qwen3.5-27B", | |
| "Jackrong/Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled", | |
| "poolside/Laguna-XS.2", | |
| "deepseek-ai/DeepSeek-R1", | |
| "mistralai/Mistral-Medium-3.5-128B", | |
| ] | |
| CSS = """ | |
| .gradio-container { max-width: 900px !important; margin: 0 auto; } | |
| .verdict-header { font-size: 1.2em; } | |
| footer { display: none; } | |
| """ | |
| with gr.Blocks( | |
| title="ModelDNA β AI Model Provenance", | |
| theme=gr.themes.Ocean(), | |
| css=CSS, | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 𧬠ModelDNA | |
| ### The DNA test for AI models β verify provenance before you download | |
| *Powered by ModelAtlas Β· a RadicalNotion product* | |
| > **Works with:** standard HuggingFace checkpoints (safetensors / PyTorch bin). | |
| > **Not yet supported:** GGUF quantized models, private/gated models. No weight download needed β Stage 1 reads config.json only. | |
| --- | |
| """) | |
| with gr.Row(): | |
| model_input = gr.Textbox( | |
| label="HuggingFace Model ID or URL", | |
| placeholder="e.g. Qwen/Qwen3.5-27B (not GGUF β use the original checkpoint)", | |
| scale=4, | |
| ) | |
| scan_btn = gr.Button("π¬ Scan", variant="primary", scale=1) | |
| gr.Examples( | |
| examples=EXAMPLES, | |
| inputs=model_input, | |
| label="Try these examples", | |
| ) | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| header_out = gr.Markdown(label="Verdict") | |
| with gr.Row(): | |
| with gr.Column(): | |
| details_out = gr.Markdown(label="Evidence") | |
| with gr.Column(): | |
| flags_out = gr.Markdown(label="Flags") | |
| gr.Markdown("---") | |
| discovery_out = gr.Markdown(label="Derivative Discovery") | |
| gr.Markdown(""" | |
| --- | |
| *Stage 1 (architecture screening): free, unlimited, no weight download needed.* | |
| *Stage 2 (weight-level analysis): coming soon β deeper confirmation.* | |
| *[modeldna.ai](https://modeldna.ai) Β· [RadicalNotionAI on HF](https://huggingface.co/RadicalNotionAI)* | |
| """) | |
| scan_btn.click( | |
| fn=run_scan, | |
| inputs=[model_input], | |
| outputs=[header_out, details_out, flags_out, discovery_out], | |
| ) | |
| model_input.submit( | |
| fn=run_scan, | |
| inputs=[model_input], | |
| outputs=[header_out, details_out, flags_out, discovery_out], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |