Spaces:

Scam-AI
/

ai-image-detector

Running

File size: 4,500 Bytes

"""AI Image Detector (Open Baseline) — Scam.AI

Wraps the open-source Organika/sdxl-detector model in a Gradio interface.
The output explicitly positions this as a community baseline, with a CTA
toward Scam.AI's production-grade detection systems.
"""

import gradio as gr
from transformers import pipeline

# Open-source baseline (192K downloads, Swin-based, ~110MB)
BASELINE_MODEL = "Organika/sdxl-detector"

print(f"Loading baseline model: {BASELINE_MODEL}")
clf = pipeline("image-classification", model=BASELINE_MODEL)
print("Model loaded.")


def detect(img):
    if img is None:
        return None, "Upload an image to analyze."

    results = clf(img)
    # results: list of dicts {label, score}
    scores = {r["label"]: float(r["score"]) for r in results}

    # Find the AI-generated probability
    ai_keys = [k for k in scores if "artificial" in k.lower() or "ai" in k.lower() or "fake" in k.lower() or "generated" in k.lower()]
    if ai_keys:
        ai_prob = scores[ai_keys[0]]
    else:
        # fallback — highest score label is "ai" if model uses different naming
        ai_prob = max(scores.values()) if list(scores.keys())[0].lower() not in ("human", "real") else 1 - max(scores.values())

    if ai_prob > 0.85:
        verdict = "🤖 **Likely AI-generated**"
        explanation = (
            f"This open-source baseline is **{ai_prob*100:.0f}% confident** "
            f"the image is AI-generated."
        )
    elif ai_prob > 0.5:
        verdict = "⚠️ **Possibly AI-generated**"
        explanation = (
            f"The baseline leans toward AI ({ai_prob*100:.0f}%) but with low "
            f"confidence. In our experience this is exactly the regime where "
            f"open models fail — they catch the obvious cases but miss the "
            f"sophisticated ones."
        )
    else:
        verdict = "📷 **Likely real photograph**"
        explanation = (
            f"Baseline confidence in 'real': {(1-ai_prob)*100:.0f}%. "
            f"Note: open baselines have well-known false-negative blind spots "
            f"on newer generators (GPT-Image-2, FLUX, etc)."
        )

    cta = (
        "\n\n---\n\n"
        "### 📌 About this baseline\n\n"
        f"Model: [`{BASELINE_MODEL}`](https://huggingface.co/{BASELINE_MODEL}) "
        "— a community Swin-Transformer trained for SDXL detection. "
        "Useful as a sanity check but does **not** generalize well to:\n"
        "- Newer generators (GPT-Image-2, FLUX.1, Imagen 3)\n"
        "- Heavily post-processed images (super-resolution, JPEG re-compression)\n"
        "- Domain shift (medical, document, surveillance)\n\n"
        "Our team published [a comprehensive benchmark](https://huggingface.co/datasets/Scam-AI/gpt-image-2) "
        "showing 30+ AUC-point gaps between in-distribution academic tests "
        "and real-world performance.\n\n"
        "**For production deployment** — APIs, on-premise inference, custom "
        "domain fine-tuning — talk to us at **[scam.ai](https://www.scam.ai)**."
    )

    return scores, verdict + "\n\n" + explanation + cta


with gr.Blocks(title="AI Image Detector — Scam.AI") as demo:
    gr.Markdown(
        "# 🛡️ AI Image Detector\n"
        "*Drop an image to check whether an open-source baseline thinks it's "
        "AI-generated.*\n\n"
        "*Built by [Scam.AI](https://www.scam.ai) · Powered by "
        f"[`{BASELINE_MODEL}`](https://huggingface.co/{BASELINE_MODEL})*"
    )

    with gr.Row():
        with gr.Column():
            inp = gr.Image(type="pil", label="Upload an image")
            btn = gr.Button("Analyze", variant="primary", size="lg")
            gr.Examples(
                examples=[],
                inputs=inp,
            )
        with gr.Column():
            scores = gr.Label(label="Class scores")
            verdict = gr.Markdown()

    btn.click(detect, inputs=inp, outputs=[scores, verdict])
    inp.change(detect, inputs=inp, outputs=[scores, verdict])

    gr.Markdown(
        "---\n"
        "**About:** This Space uses a community-trained open baseline to "
        "demonstrate the AI-image-detection task. Real production systems "
        "(higher accuracy, calibrated probabilities, robust to adversarial "
        "post-processing) are not open. Visit [scam.ai](https://www.scam.ai) "
        "if you need detection for actual deployment."
    )


if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft(primary_hue="blue"), ssr_mode=False)