Spaces:

RadicalNotionAI
/

modeldna

Running

App Files Files Community

modeldna / app.py

trohrbaugh

Add GGUF detection with friendly error; clarify scope in UI

bbd83fb verified 1 day ago

raw

history blame contribute delete

9.26 kB

	#!/usr/bin/env python3
	"""
	modeldna — HuggingFace Space
	Interactive model provenance scanner.
	Replaces the stale RadicalNotionAI/modelatlas-dashboard Space.

	Deployed at: https://huggingface.co/spaces/RadicalNotionAI/modeldna
	Custom domain: modeldna.ai (via HF Space custom domain setting)
	"""
	import gradio as gr
	import json
	import sys
	import time
	from pathlib import Path

	# scan.py is in the same directory as app.py in both local hf_space/ and on HF
	sys.path.insert(0, str(Path(__file__).parent))
	from scan import scan, KNOWN_BASES

	# ── Discovery: find derivatives that may not attribute properly ────────────

	def find_unattributed_derivatives(base_match: str, scanned_id: str) -> list[dict]:
	"""
	Query the scan results database for models sharing the same base
	that don't declare attribution to their source.
	Returns models that appear derivative but lack proper attribution.
	"""
	try:
	import psycopg2
	conn = psycopg2.connect(
	"postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim"
	)
	cur = conn.cursor()
	# Find models in the scan results that match this base but lack attribution
	# (placeholder query — will be populated as scans accumulate)
	cur.execute("""
	SELECT model_id, confirmed_base, has_attribution, downloads
	FROM modeldna_scans
	WHERE confirmed_base = %s
	AND model_id != %s
	AND (has_attribution = false OR has_attribution IS NULL)
	ORDER BY downloads DESC NULLS LAST
	LIMIT 5
	""", (base_match, scanned_id))
	rows = cur.fetchall()
	cur.close(); conn.close()
	return [{"model_id": r[0], "confirmed_base": r[1], "downloads": r[3]} for r in rows]
	except Exception:
	return []


	def store_scan_result(result: dict) -> None:
	"""Store a scan result for future derivative discovery."""
	try:
	import psycopg2
	conn = psycopg2.connect(
	"postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim"
	)
	cur = conn.cursor()
	cur.execute("""
	CREATE TABLE IF NOT EXISTS modeldna_scans (
	id SERIAL PRIMARY KEY,
	model_id TEXT UNIQUE,
	confirmed_base TEXT,
	confidence TEXT,
	has_attribution BOOLEAN,
	flag_count INT,
	downloads INT,
	scanned_at TIMESTAMPTZ DEFAULT now()
	)
	""")
	v = result.get("verdict", {})
	m = result.get("metadata", {})
	e = result.get("evidence", {})
	has_attr = bool(e.get("claimed_base"))
	cur.execute("""
	INSERT INTO modeldna_scans
	(model_id, confirmed_base, confidence, has_attribution, flag_count, downloads)
	VALUES (%s, %s, %s, %s, %s, %s)
	ON CONFLICT (model_id) DO UPDATE
	SET confidence=EXCLUDED.confidence,
	has_attribution=EXCLUDED.has_attribution,
	flag_count=EXCLUDED.flag_count,
	downloads=EXCLUDED.downloads,
	scanned_at=now()
	""", (
	result.get("model_id"),
	v.get("base_model_confirmed"),
	v.get("confidence"),
	has_attr,
	v.get("flag_count", 0),
	m.get("downloads", 0),
	))
	conn.commit(); cur.close(); conn.close()
	except Exception:
	pass # graceful — don't break the scan if storage fails


	def format_verdict(result: dict) -> tuple[str, str, str]:
	"""Format scan result into three UI sections."""
	if "error" in result:
	return (
	"❌ Scan Failed",
	f"Error: {result['error']}",
	""
	)

	v = result.get("verdict", {})
	e = result.get("evidence", {})
	m = result.get("metadata", {})
	flags = v.get("flags", [])

	# Header
	confidence_emoji = {"HIGH": "✅", "MODERATE": "⚠️", "NONE": "❓"}.get(v.get("confidence",""), "❓")
	header = f"{confidence_emoji} {v.get('architecture', 'Unknown')}"
	header += f"\n\nScanned in {result.get('elapsed_s', '?')}s · Stage 1 (config-only)"
	header += f"\n\n📥 {m.get('downloads',0):,} downloads · 👍 {m.get('likes',0)} likes"

	# Verdict details
	details = f"### Architecture Confirmation\n"
	details += f"Base model: {v.get('base_model_confirmed', 'Unrecognized')}\n"
	details += f"Confidence: {v.get('confidence', 'None')}\n\n"

	if e.get("base_matches"):
	details += "Evidence:\n"
	for bm in e["base_matches"][:2]:
	for ev in bm.get("evidence", []):
	details += f"- {ev}\n"
	details += "\n"

	if e.get("modelatlas_similar"):
	details += "Similar verified models (ModelAtlas reference):\n"
	for s in e["modelatlas_similar"][:3]:
	details += f"- `{s['model_id']}`\n"

	# Flags
	flag_text = ""
	if flags:
	flag_text = f"### ⚠️ {len(flags)} Flag(s) Found\n\n"
	for f in flags:
	flag_text += f"[{f['type']}]\n\n{f['explanation']}\n\n---\n\n"
	else:
	flag_text = "### ✅ No Flags\n\nNo suspicious claims detected in model name or metadata."

	return header, details, flag_text


	def run_scan(model_id: str) -> tuple[str, str, str, str]:
	"""Main scan function called by Gradio."""
	model_id = model_id.strip()
	if not model_id:
	return "Enter a HuggingFace model ID above.", "", "", ""

	# Normalize: handle full URLs
	if "huggingface.co/" in model_id:
	model_id = model_id.split("huggingface.co/")[-1].strip("/")

	result = scan(model_id)

	# Store result for derivative discovery
	store_scan_result(result)

	# Find unattributed derivatives
	base = result.get("verdict", {}).get("base_model_confirmed", "")
	derivatives = find_unattributed_derivatives(base, model_id) if base else []

	header, details, flags = format_verdict(result)

	# Derivative discovery section
	discovery = ""
	if derivatives:
	discovery = f"### 🔍 {len(derivatives)} Related Models Found Without Attribution\n\n"
	discovery += "These models share the same architecture base but don't declare it:\n\n"
	for d in derivatives:
	discovery += f"- `{d['model_id']}` ({d.get('downloads',0):,} downloads)\n"
	else:
	discovery = (
	"### 🔍 Derivative Discovery\n\n"
	"This scan has been stored. As similar models are scanned, "
	"derivatives that don't properly attribute their source will appear here."
	)

	return header, details, flags, discovery


	# ── Gradio UI ──────────────────────────────────────────────────────────────

	EXAMPLES = [
	"Qwen/Qwen3.5-27B",
	"Jackrong/Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled",
	"poolside/Laguna-XS.2",
	"deepseek-ai/DeepSeek-R1",
	"mistralai/Mistral-Medium-3.5-128B",
	]

	CSS = """
	.gradio-container { max-width: 900px !important; margin: 0 auto; }
	.verdict-header { font-size: 1.2em; }
	footer { display: none; }
	"""

	with gr.Blocks(
	title="ModelDNA — AI Model Provenance",
	theme=gr.themes.Ocean(),
	css=CSS,
	) as demo:
	gr.Markdown("""
	# 🧬 ModelDNA
	### The DNA test for AI models — verify provenance before you download
	Powered by ModelAtlas · a RadicalNotion product

	> Works with: standard HuggingFace checkpoints (safetensors / PyTorch bin).
	> Not yet supported: GGUF quantized models, private/gated models. No weight download needed — Stage 1 reads config.json only.
	---
	""")

	with gr.Row():
	model_input = gr.Textbox(
	label="HuggingFace Model ID or URL",
	placeholder="e.g. Qwen/Qwen3.5-27B (not GGUF — use the original checkpoint)",
	scale=4,
	)
	scan_btn = gr.Button("🔬 Scan", variant="primary", scale=1)

	gr.Examples(
	examples=EXAMPLES,
	inputs=model_input,
	label="Try these examples",
	)

	gr.Markdown("---")

	with gr.Row():
	header_out = gr.Markdown(label="Verdict")
	with gr.Row():
	with gr.Column():
	details_out = gr.Markdown(label="Evidence")
	with gr.Column():
	flags_out = gr.Markdown(label="Flags")

	gr.Markdown("---")
	discovery_out = gr.Markdown(label="Derivative Discovery")

	gr.Markdown("""
	---
	Stage 1 (architecture screening): free, unlimited, no weight download needed.
	Stage 2 (weight-level analysis): coming soon — deeper confirmation.
	[modeldna.ai](https://modeldna.ai) · [RadicalNotionAI on HF](https://huggingface.co/RadicalNotionAI)
	""")

	scan_btn.click(
	fn=run_scan,
	inputs=[model_input],
	outputs=[header_out, details_out, flags_out, discovery_out],
	)
	model_input.submit(
	fn=run_scan,
	inputs=[model_input],
	outputs=[header_out, details_out, flags_out, discovery_out],
	)

	if __name__ == "__main__":
	demo.launch()