modeldna / app.py
trohrbaugh's picture
Add GGUF detection with friendly error; clarify scope in UI
bbd83fb verified
#!/usr/bin/env python3
"""
modeldna β€” HuggingFace Space
Interactive model provenance scanner.
Replaces the stale RadicalNotionAI/modelatlas-dashboard Space.
Deployed at: https://huggingface.co/spaces/RadicalNotionAI/modeldna
Custom domain: modeldna.ai (via HF Space custom domain setting)
"""
import gradio as gr
import json
import sys
import time
from pathlib import Path
# scan.py is in the same directory as app.py in both local hf_space/ and on HF
sys.path.insert(0, str(Path(__file__).parent))
from scan import scan, KNOWN_BASES
# ── Discovery: find derivatives that may not attribute properly ────────────
def find_unattributed_derivatives(base_match: str, scanned_id: str) -> list[dict]:
"""
Query the scan results database for models sharing the same base
that don't declare attribution to their source.
Returns models that appear derivative but lack proper attribution.
"""
try:
import psycopg2
conn = psycopg2.connect(
"postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim"
)
cur = conn.cursor()
# Find models in the scan results that match this base but lack attribution
# (placeholder query β€” will be populated as scans accumulate)
cur.execute("""
SELECT model_id, confirmed_base, has_attribution, downloads
FROM modeldna_scans
WHERE confirmed_base = %s
AND model_id != %s
AND (has_attribution = false OR has_attribution IS NULL)
ORDER BY downloads DESC NULLS LAST
LIMIT 5
""", (base_match, scanned_id))
rows = cur.fetchall()
cur.close(); conn.close()
return [{"model_id": r[0], "confirmed_base": r[1], "downloads": r[3]} for r in rows]
except Exception:
return []
def store_scan_result(result: dict) -> None:
"""Store a scan result for future derivative discovery."""
try:
import psycopg2
conn = psycopg2.connect(
"postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim"
)
cur = conn.cursor()
cur.execute("""
CREATE TABLE IF NOT EXISTS modeldna_scans (
id SERIAL PRIMARY KEY,
model_id TEXT UNIQUE,
confirmed_base TEXT,
confidence TEXT,
has_attribution BOOLEAN,
flag_count INT,
downloads INT,
scanned_at TIMESTAMPTZ DEFAULT now()
)
""")
v = result.get("verdict", {})
m = result.get("metadata", {})
e = result.get("evidence", {})
has_attr = bool(e.get("claimed_base"))
cur.execute("""
INSERT INTO modeldna_scans
(model_id, confirmed_base, confidence, has_attribution, flag_count, downloads)
VALUES (%s, %s, %s, %s, %s, %s)
ON CONFLICT (model_id) DO UPDATE
SET confidence=EXCLUDED.confidence,
has_attribution=EXCLUDED.has_attribution,
flag_count=EXCLUDED.flag_count,
downloads=EXCLUDED.downloads,
scanned_at=now()
""", (
result.get("model_id"),
v.get("base_model_confirmed"),
v.get("confidence"),
has_attr,
v.get("flag_count", 0),
m.get("downloads", 0),
))
conn.commit(); cur.close(); conn.close()
except Exception:
pass # graceful β€” don't break the scan if storage fails
def format_verdict(result: dict) -> tuple[str, str, str]:
"""Format scan result into three UI sections."""
if "error" in result:
return (
"❌ Scan Failed",
f"**Error**: {result['error']}",
""
)
v = result.get("verdict", {})
e = result.get("evidence", {})
m = result.get("metadata", {})
flags = v.get("flags", [])
# Header
confidence_emoji = {"HIGH": "βœ…", "MODERATE": "⚠️", "NONE": "❓"}.get(v.get("confidence",""), "❓")
header = f"{confidence_emoji} **{v.get('architecture', 'Unknown')}**"
header += f"\n\n*Scanned in {result.get('elapsed_s', '?')}s Β· Stage 1 (config-only)*"
header += f"\n\nπŸ“₯ {m.get('downloads',0):,} downloads Β· πŸ‘ {m.get('likes',0)} likes"
# Verdict details
details = f"### Architecture Confirmation\n"
details += f"**Base model**: {v.get('base_model_confirmed', 'Unrecognized')}\n"
details += f"**Confidence**: {v.get('confidence', 'None')}\n\n"
if e.get("base_matches"):
details += "**Evidence**:\n"
for bm in e["base_matches"][:2]:
for ev in bm.get("evidence", []):
details += f"- {ev}\n"
details += "\n"
if e.get("modelatlas_similar"):
details += "**Similar verified models** (ModelAtlas reference):\n"
for s in e["modelatlas_similar"][:3]:
details += f"- `{s['model_id']}`\n"
# Flags
flag_text = ""
if flags:
flag_text = f"### ⚠️ {len(flags)} Flag(s) Found\n\n"
for f in flags:
flag_text += f"**[{f['type']}]**\n\n{f['explanation']}\n\n---\n\n"
else:
flag_text = "### βœ… No Flags\n\nNo suspicious claims detected in model name or metadata."
return header, details, flag_text
def run_scan(model_id: str) -> tuple[str, str, str, str]:
"""Main scan function called by Gradio."""
model_id = model_id.strip()
if not model_id:
return "Enter a HuggingFace model ID above.", "", "", ""
# Normalize: handle full URLs
if "huggingface.co/" in model_id:
model_id = model_id.split("huggingface.co/")[-1].strip("/")
result = scan(model_id)
# Store result for derivative discovery
store_scan_result(result)
# Find unattributed derivatives
base = result.get("verdict", {}).get("base_model_confirmed", "")
derivatives = find_unattributed_derivatives(base, model_id) if base else []
header, details, flags = format_verdict(result)
# Derivative discovery section
discovery = ""
if derivatives:
discovery = f"### πŸ” {len(derivatives)} Related Models Found Without Attribution\n\n"
discovery += "These models share the same architecture base but don't declare it:\n\n"
for d in derivatives:
discovery += f"- `{d['model_id']}` ({d.get('downloads',0):,} downloads)\n"
else:
discovery = (
"### πŸ” Derivative Discovery\n\n"
"This scan has been stored. As similar models are scanned, "
"derivatives that don't properly attribute their source will appear here."
)
return header, details, flags, discovery
# ── Gradio UI ──────────────────────────────────────────────────────────────
EXAMPLES = [
"Qwen/Qwen3.5-27B",
"Jackrong/Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled",
"poolside/Laguna-XS.2",
"deepseek-ai/DeepSeek-R1",
"mistralai/Mistral-Medium-3.5-128B",
]
CSS = """
.gradio-container { max-width: 900px !important; margin: 0 auto; }
.verdict-header { font-size: 1.2em; }
footer { display: none; }
"""
with gr.Blocks(
title="ModelDNA β€” AI Model Provenance",
theme=gr.themes.Ocean(),
css=CSS,
) as demo:
gr.Markdown("""
# 🧬 ModelDNA
### The DNA test for AI models β€” verify provenance before you download
*Powered by ModelAtlas Β· a RadicalNotion product*
> **Works with:** standard HuggingFace checkpoints (safetensors / PyTorch bin).
> **Not yet supported:** GGUF quantized models, private/gated models. No weight download needed β€” Stage 1 reads config.json only.
---
""")
with gr.Row():
model_input = gr.Textbox(
label="HuggingFace Model ID or URL",
placeholder="e.g. Qwen/Qwen3.5-27B (not GGUF β€” use the original checkpoint)",
scale=4,
)
scan_btn = gr.Button("πŸ”¬ Scan", variant="primary", scale=1)
gr.Examples(
examples=EXAMPLES,
inputs=model_input,
label="Try these examples",
)
gr.Markdown("---")
with gr.Row():
header_out = gr.Markdown(label="Verdict")
with gr.Row():
with gr.Column():
details_out = gr.Markdown(label="Evidence")
with gr.Column():
flags_out = gr.Markdown(label="Flags")
gr.Markdown("---")
discovery_out = gr.Markdown(label="Derivative Discovery")
gr.Markdown("""
---
*Stage 1 (architecture screening): free, unlimited, no weight download needed.*
*Stage 2 (weight-level analysis): coming soon β€” deeper confirmation.*
*[modeldna.ai](https://modeldna.ai) Β· [RadicalNotionAI on HF](https://huggingface.co/RadicalNotionAI)*
""")
scan_btn.click(
fn=run_scan,
inputs=[model_input],
outputs=[header_out, details_out, flags_out, discovery_out],
)
model_input.submit(
fn=run_scan,
inputs=[model_input],
outputs=[header_out, details_out, flags_out, discovery_out],
)
if __name__ == "__main__":
demo.launch()