File size: 9,259 Bytes
b210edb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340004c
b210edb
 
 
 
 
 
bbd83fb
 
 
b210edb
 
 
 
 
bbd83fb
 
b210edb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
#!/usr/bin/env python3
"""
modeldna β€” HuggingFace Space
Interactive model provenance scanner.
Replaces the stale RadicalNotionAI/modelatlas-dashboard Space.

Deployed at: https://huggingface.co/spaces/RadicalNotionAI/modeldna
Custom domain: modeldna.ai (via HF Space custom domain setting)
"""
import gradio as gr
import json
import sys
import time
from pathlib import Path

# scan.py is in the same directory as app.py in both local hf_space/ and on HF
sys.path.insert(0, str(Path(__file__).parent))
from scan import scan, KNOWN_BASES

# ── Discovery: find derivatives that may not attribute properly ────────────

def find_unattributed_derivatives(base_match: str, scanned_id: str) -> list[dict]:
    """
    Query the scan results database for models sharing the same base
    that don't declare attribution to their source.
    Returns models that appear derivative but lack proper attribution.
    """
    try:
        import psycopg2
        conn = psycopg2.connect(
            "postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim"
        )
        cur = conn.cursor()
        # Find models in the scan results that match this base but lack attribution
        # (placeholder query β€” will be populated as scans accumulate)
        cur.execute("""
            SELECT model_id, confirmed_base, has_attribution, downloads
            FROM modeldna_scans
            WHERE confirmed_base = %s
              AND model_id != %s
              AND (has_attribution = false OR has_attribution IS NULL)
            ORDER BY downloads DESC NULLS LAST
            LIMIT 5
        """, (base_match, scanned_id))
        rows = cur.fetchall()
        cur.close(); conn.close()
        return [{"model_id": r[0], "confirmed_base": r[1], "downloads": r[3]} for r in rows]
    except Exception:
        return []


def store_scan_result(result: dict) -> None:
    """Store a scan result for future derivative discovery."""
    try:
        import psycopg2
        conn = psycopg2.connect(
            "postgresql:///modelatlas?host=/var/run/postgresql&port=5433&user=tim"
        )
        cur = conn.cursor()
        cur.execute("""
            CREATE TABLE IF NOT EXISTS modeldna_scans (
                id SERIAL PRIMARY KEY,
                model_id TEXT UNIQUE,
                confirmed_base TEXT,
                confidence TEXT,
                has_attribution BOOLEAN,
                flag_count INT,
                downloads INT,
                scanned_at TIMESTAMPTZ DEFAULT now()
            )
        """)
        v = result.get("verdict", {})
        m = result.get("metadata", {})
        e = result.get("evidence", {})
        has_attr = bool(e.get("claimed_base"))
        cur.execute("""
            INSERT INTO modeldna_scans
              (model_id, confirmed_base, confidence, has_attribution, flag_count, downloads)
            VALUES (%s, %s, %s, %s, %s, %s)
            ON CONFLICT (model_id) DO UPDATE
              SET confidence=EXCLUDED.confidence,
                  has_attribution=EXCLUDED.has_attribution,
                  flag_count=EXCLUDED.flag_count,
                  downloads=EXCLUDED.downloads,
                  scanned_at=now()
        """, (
            result.get("model_id"),
            v.get("base_model_confirmed"),
            v.get("confidence"),
            has_attr,
            v.get("flag_count", 0),
            m.get("downloads", 0),
        ))
        conn.commit(); cur.close(); conn.close()
    except Exception:
        pass  # graceful β€” don't break the scan if storage fails


def format_verdict(result: dict) -> tuple[str, str, str]:
    """Format scan result into three UI sections."""
    if "error" in result:
        return (
            "❌ Scan Failed",
            f"**Error**: {result['error']}",
            ""
        )

    v = result.get("verdict", {})
    e = result.get("evidence", {})
    m = result.get("metadata", {})
    flags = v.get("flags", [])

    # Header
    confidence_emoji = {"HIGH": "βœ…", "MODERATE": "⚠️", "NONE": "❓"}.get(v.get("confidence",""), "❓")
    header = f"{confidence_emoji} **{v.get('architecture', 'Unknown')}**"
    header += f"\n\n*Scanned in {result.get('elapsed_s', '?')}s Β· Stage 1 (config-only)*"
    header += f"\n\nπŸ“₯ {m.get('downloads',0):,} downloads Β· πŸ‘ {m.get('likes',0)} likes"

    # Verdict details
    details = f"### Architecture Confirmation\n"
    details += f"**Base model**: {v.get('base_model_confirmed', 'Unrecognized')}\n"
    details += f"**Confidence**: {v.get('confidence', 'None')}\n\n"

    if e.get("base_matches"):
        details += "**Evidence**:\n"
        for bm in e["base_matches"][:2]:
            for ev in bm.get("evidence", []):
                details += f"- {ev}\n"
    details += "\n"

    if e.get("modelatlas_similar"):
        details += "**Similar verified models** (ModelAtlas reference):\n"
        for s in e["modelatlas_similar"][:3]:
            details += f"- `{s['model_id']}`\n"

    # Flags
    flag_text = ""
    if flags:
        flag_text = f"### ⚠️ {len(flags)} Flag(s) Found\n\n"
        for f in flags:
            flag_text += f"**[{f['type']}]**\n\n{f['explanation']}\n\n---\n\n"
    else:
        flag_text = "### βœ… No Flags\n\nNo suspicious claims detected in model name or metadata."

    return header, details, flag_text


def run_scan(model_id: str) -> tuple[str, str, str, str]:
    """Main scan function called by Gradio."""
    model_id = model_id.strip()
    if not model_id:
        return "Enter a HuggingFace model ID above.", "", "", ""

    # Normalize: handle full URLs
    if "huggingface.co/" in model_id:
        model_id = model_id.split("huggingface.co/")[-1].strip("/")

    result = scan(model_id)

    # Store result for derivative discovery
    store_scan_result(result)

    # Find unattributed derivatives
    base = result.get("verdict", {}).get("base_model_confirmed", "")
    derivatives = find_unattributed_derivatives(base, model_id) if base else []

    header, details, flags = format_verdict(result)

    # Derivative discovery section
    discovery = ""
    if derivatives:
        discovery = f"### πŸ” {len(derivatives)} Related Models Found Without Attribution\n\n"
        discovery += "These models share the same architecture base but don't declare it:\n\n"
        for d in derivatives:
            discovery += f"- `{d['model_id']}` ({d.get('downloads',0):,} downloads)\n"
    else:
        discovery = (
            "### πŸ” Derivative Discovery\n\n"
            "This scan has been stored. As similar models are scanned, "
            "derivatives that don't properly attribute their source will appear here."
        )

    return header, details, flags, discovery


# ── Gradio UI ──────────────────────────────────────────────────────────────

EXAMPLES = [
    "Qwen/Qwen3.5-27B",
    "Jackrong/Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled",
    "poolside/Laguna-XS.2",
    "deepseek-ai/DeepSeek-R1",
    "mistralai/Mistral-Medium-3.5-128B",
]

CSS = """
.gradio-container { max-width: 900px !important; margin: 0 auto; }
.verdict-header { font-size: 1.2em; }
footer { display: none; }
"""

with gr.Blocks(
    title="ModelDNA β€” AI Model Provenance",
    theme=gr.themes.Ocean(),
    css=CSS,
) as demo:
    gr.Markdown("""
    # 🧬 ModelDNA
    ### The DNA test for AI models β€” verify provenance before you download
    *Powered by ModelAtlas Β· a RadicalNotion product*

    > **Works with:** standard HuggingFace checkpoints (safetensors / PyTorch bin).
    > **Not yet supported:** GGUF quantized models, private/gated models. No weight download needed β€” Stage 1 reads config.json only.
    ---
    """)

    with gr.Row():
        model_input = gr.Textbox(
            label="HuggingFace Model ID or URL",
            placeholder="e.g. Qwen/Qwen3.5-27B  (not GGUF β€” use the original checkpoint)",
            scale=4,
        )
        scan_btn = gr.Button("πŸ”¬ Scan", variant="primary", scale=1)

    gr.Examples(
        examples=EXAMPLES,
        inputs=model_input,
        label="Try these examples",
    )

    gr.Markdown("---")

    with gr.Row():
        header_out = gr.Markdown(label="Verdict")
    with gr.Row():
        with gr.Column():
            details_out = gr.Markdown(label="Evidence")
        with gr.Column():
            flags_out = gr.Markdown(label="Flags")

    gr.Markdown("---")
    discovery_out = gr.Markdown(label="Derivative Discovery")

    gr.Markdown("""
    ---
    *Stage 1 (architecture screening): free, unlimited, no weight download needed.*
    *Stage 2 (weight-level analysis): coming soon β€” deeper confirmation.*
    *[modeldna.ai](https://modeldna.ai) Β· [RadicalNotionAI on HF](https://huggingface.co/RadicalNotionAI)*
    """)

    scan_btn.click(
        fn=run_scan,
        inputs=[model_input],
        outputs=[header_out, details_out, flags_out, discovery_out],
    )
    model_input.submit(
        fn=run_scan,
        inputs=[model_input],
        outputs=[header_out, details_out, flags_out, discovery_out],
    )

if __name__ == "__main__":
    demo.launch()