#!/usr/bin/env python3 """ modeldna — HuggingFace Space Interactive model provenance scanner. Replaces the stale RadicalNotionAI/modelatlas-dashboard Space. Deployed at: https://huggingface.co/spaces/RadicalNotionAI/modeldna Custom domain: modeldna.ai (via HF Space custom domain setting) """ import gradio as gr import json import sys import time from pathlib import Path # scan.py is in the same directory as app.py in both local hf_space/ and on HF sys.path.insert(0, str(Path(__file__).parent)) from scan import scan, KNOWN_BASES # ── Discovery: find derivatives that may not attribute properly ──────────── def find_unattributed_derivatives(base_match: str, scanned_id: str) -> list[dict]: """ Query the scan results database for models sharing the same base that don't declare attribution to their source. Returns models that appear derivative but lack proper attribution. """ try: import psycopg2 conn = psycopg2.connect( "postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim" ) cur = conn.cursor() # Find models in the scan results that match this base but lack attribution # (placeholder query — will be populated as scans accumulate) cur.execute(""" SELECT model_id, confirmed_base, has_attribution, downloads FROM modeldna_scans WHERE confirmed_base = %s AND model_id != %s AND (has_attribution = false OR has_attribution IS NULL) ORDER BY downloads DESC NULLS LAST LIMIT 5 """, (base_match, scanned_id)) rows = cur.fetchall() cur.close(); conn.close() return [{"model_id": r[0], "confirmed_base": r[1], "downloads": r[3]} for r in rows] except Exception: return [] def store_scan_result(result: dict) -> None: """Store a scan result for future derivative discovery.""" try: import psycopg2 conn = psycopg2.connect( "postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim" ) cur = conn.cursor() cur.execute(""" CREATE TABLE IF NOT EXISTS modeldna_scans ( id SERIAL PRIMARY KEY, model_id TEXT UNIQUE, confirmed_base TEXT, confidence TEXT, has_attribution BOOLEAN, flag_count INT, downloads INT, scanned_at TIMESTAMPTZ DEFAULT now() ) """) v = result.get("verdict", {}) m = result.get("metadata", {}) e = result.get("evidence", {}) has_attr = bool(e.get("claimed_base")) cur.execute(""" INSERT INTO modeldna_scans (model_id, confirmed_base, confidence, has_attribution, flag_count, downloads) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (model_id) DO UPDATE SET confidence=EXCLUDED.confidence, has_attribution=EXCLUDED.has_attribution, flag_count=EXCLUDED.flag_count, downloads=EXCLUDED.downloads, scanned_at=now() """, ( result.get("model_id"), v.get("base_model_confirmed"), v.get("confidence"), has_attr, v.get("flag_count", 0), m.get("downloads", 0), )) conn.commit(); cur.close(); conn.close() except Exception: pass # graceful — don't break the scan if storage fails def format_verdict(result: dict) -> tuple[str, str, str]: """Format scan result into three UI sections.""" if "error" in result: return ( "❌ Scan Failed", f"**Error**: {result['error']}", "" ) v = result.get("verdict", {}) e = result.get("evidence", {}) m = result.get("metadata", {}) flags = v.get("flags", []) # Header confidence_emoji = {"HIGH": "✅", "MODERATE": "⚠️", "NONE": "❓"}.get(v.get("confidence",""), "❓") header = f"{confidence_emoji} **{v.get('architecture', 'Unknown')}**" header += f"\n\n*Scanned in {result.get('elapsed_s', '?')}s · Stage 1 (config-only)*" header += f"\n\n📥 {m.get('downloads',0):,} downloads · 👍 {m.get('likes',0)} likes" # Verdict details details = f"### Architecture Confirmation\n" details += f"**Base model**: {v.get('base_model_confirmed', 'Unrecognized')}\n" details += f"**Confidence**: {v.get('confidence', 'None')}\n\n" if e.get("base_matches"): details += "**Evidence**:\n" for bm in e["base_matches"][:2]: for ev in bm.get("evidence", []): details += f"- {ev}\n" details += "\n" if e.get("modelatlas_similar"): details += "**Similar verified models** (ModelAtlas reference):\n" for s in e["modelatlas_similar"][:3]: details += f"- `{s['model_id']}`\n" # Flags flag_text = "" if flags: flag_text = f"### ⚠️ {len(flags)} Flag(s) Found\n\n" for f in flags: flag_text += f"**[{f['type']}]**\n\n{f['explanation']}\n\n---\n\n" else: flag_text = "### ✅ No Flags\n\nNo suspicious claims detected in model name or metadata." return header, details, flag_text def run_scan(model_id: str) -> tuple[str, str, str, str]: """Main scan function called by Gradio.""" model_id = model_id.strip() if not model_id: return "Enter a HuggingFace model ID above.", "", "", "" # Normalize: handle full URLs if "huggingface.co/" in model_id: model_id = model_id.split("huggingface.co/")[-1].strip("/") result = scan(model_id) # Store result for derivative discovery store_scan_result(result) # Find unattributed derivatives base = result.get("verdict", {}).get("base_model_confirmed", "") derivatives = find_unattributed_derivatives(base, model_id) if base else [] header, details, flags = format_verdict(result) # Derivative discovery section discovery = "" if derivatives: discovery = f"### 🔍 {len(derivatives)} Related Models Found Without Attribution\n\n" discovery += "These models share the same architecture base but don't declare it:\n\n" for d in derivatives: discovery += f"- `{d['model_id']}` ({d.get('downloads',0):,} downloads)\n" else: discovery = ( "### 🔍 Derivative Discovery\n\n" "This scan has been stored. As similar models are scanned, " "derivatives that don't properly attribute their source will appear here." ) return header, details, flags, discovery # ── Gradio UI ────────────────────────────────────────────────────────────── EXAMPLES = [ "Qwen/Qwen3.5-27B", "Jackrong/Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled", "poolside/Laguna-XS.2", "deepseek-ai/DeepSeek-R1", "mistralai/Mistral-Medium-3.5-128B", ] CSS = """ .gradio-container { max-width: 900px !important; margin: 0 auto; } .verdict-header { font-size: 1.2em; } footer { display: none; } """ with gr.Blocks( title="ModelDNA — AI Model Provenance", theme=gr.themes.Ocean(), css=CSS, ) as demo: gr.Markdown(""" # 🧬 ModelDNA ### The DNA test for AI models — verify provenance before you download *Powered by ModelAtlas · a RadicalNotion product* > **Works with:** standard HuggingFace checkpoints (safetensors / PyTorch bin). > **Not yet supported:** GGUF quantized models, private/gated models. No weight download needed — Stage 1 reads config.json only. --- """) with gr.Row(): model_input = gr.Textbox( label="HuggingFace Model ID or URL", placeholder="e.g. Qwen/Qwen3.5-27B (not GGUF — use the original checkpoint)", scale=4, ) scan_btn = gr.Button("🔬 Scan", variant="primary", scale=1) gr.Examples( examples=EXAMPLES, inputs=model_input, label="Try these examples", ) gr.Markdown("---") with gr.Row(): header_out = gr.Markdown(label="Verdict") with gr.Row(): with gr.Column(): details_out = gr.Markdown(label="Evidence") with gr.Column(): flags_out = gr.Markdown(label="Flags") gr.Markdown("---") discovery_out = gr.Markdown(label="Derivative Discovery") gr.Markdown(""" --- *Stage 1 (architecture screening): free, unlimited, no weight download needed.* *Stage 2 (weight-level analysis): coming soon — deeper confirmation.* *[modeldna.ai](https://modeldna.ai) · [RadicalNotionAI on HF](https://huggingface.co/RadicalNotionAI)* """) scan_btn.click( fn=run_scan, inputs=[model_input], outputs=[header_out, details_out, flags_out, discovery_out], ) model_input.submit( fn=run_scan, inputs=[model_input], outputs=[header_out, details_out, flags_out, discovery_out], ) if __name__ == "__main__": demo.launch()