Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

anky2002 commited on 14 days ago

Commit

2f3f5e8

verified ·

1 Parent(s): ceda018

Upload agents/statistical_agent.py with huggingface_hub

Browse files

Files changed (1) hide show

agents/statistical_agent.py +220 -0

agents/statistical_agent.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+FORENSIQ — Statistical Priors Agent
+Tests natural image statistics violations:
+  - DCT coefficient distribution (Laplacian vs Gaussian)
+  - Benford's law on first digits of DCT coefficients
+  - Gradient sparsity (kurtosis > 3 for natural images)
+"""
+import numpy as np
+from PIL import Image
+from scipy.fftpack import dct
+from scipy.stats import kurtosis as scipy_kurtosis, entropy
+from typing import Dict, Any
+from agents.optical_agent import AgentEvidence
+# ─── DCT Coefficient Distribution ───────────────────────────────────
+def analyze_dct_distribution(img: Image.Image) -> Dict[str, Any]:
+    """
+    Natural image DCT coefficients follow a Laplacian (heavy-tailed)
+    distribution. AI-generated images often follow a Gaussian.
+    """
+    gray = np.array(img.convert("L")).astype(np.float64)
+    h, w = gray.shape
+    h_crop, w_crop = (h // 8) * 8, (w // 8) * 8
+    gray = gray[:h_crop, :w_crop]
+    coeffs = []
+    for i in range(0, h_crop, 8):
+        for j in range(0, w_crop, 8):
+            block = gray[i:i + 8, j:j + 8]
+            dct_block = dct(dct(block.T, norm="ortho").T, norm="ortho")
+            # Skip DC coefficient
+            ac = dct_block.copy()
+            ac[0, 0] = 0
+            coeffs.extend(ac.flatten().tolist())
+    coeffs = np.array(coeffs)
+    coeffs = coeffs[coeffs != 0]
+    if len(coeffs) < 100:
+        return {"test": "DCT Distribution", "score": 0.0, "note": "Insufficient data"}
+    # Kurtosis: Laplacian ≈ 6, Gaussian ≈ 3
+    kurt = float(scipy_kurtosis(coeffs, fisher=True))
+    if kurt > 4.5:
+        score = -0.4
+        note = f"DCT kurtosis={kurt:.2f} (Laplacian-like, consistent with natural images)"
+    elif kurt < 2.0:
+        score = 0.5
+        note = f"DCT kurtosis={kurt:.2f} (Gaussian-like, inconsistent with natural images)"
+    elif kurt < 3.5:
+        score = 0.2
+        note = f"DCT kurtosis={kurt:.2f} (borderline, mildly Gaussian)"
+    else:
+        score = -0.1
+        note = f"DCT kurtosis={kurt:.2f} (near-natural)"
+    return {
+        "test": "DCT Distribution",
+        "kurtosis": round(kurt, 4),
+        "mean": round(float(np.mean(coeffs)), 4),
+        "std": round(float(np.std(coeffs)), 4),
+        "score": score,
+        "note": note,
+    }
+# ─── Benford's Law ──────────────────────────────────────────────────
+def analyze_benford(img: Image.Image) -> Dict[str, Any]:
+    """
+    First-digit distribution of DCT coefficients should follow
+    Benford's Law in natural images. AI images deviate.
+    """
+    gray = np.array(img.convert("L")).astype(np.float64)
+    h, w = gray.shape
+    h_crop, w_crop = (h // 8) * 8, (w // 8) * 8
+    gray = gray[:h_crop, :w_crop]
+    coeffs = []
+    for i in range(0, h_crop, 8):
+        for j in range(0, w_crop, 8):
+            block = gray[i:i + 8, j:j + 8]
+            dct_block = dct(dct(block.T, norm="ortho").T, norm="ortho")
+            coeffs.extend(np.abs(dct_block.flatten()).tolist())
+    coeffs = np.array(coeffs)
+    nonzero = coeffs[coeffs > 0]
+    if len(nonzero) < 100:
+        return {"test": "Benford's Law", "score": 0.0, "note": "Insufficient data"}
+    # Extract first digits
+    log_vals = np.floor(np.log10(nonzero + 1e-12))
+    first_digits = np.floor(nonzero / (10 ** log_vals)).astype(int)
+    first_digits = first_digits[(first_digits >= 1) & (first_digits <= 9)]
+    observed = np.array([np.sum(first_digits == d) for d in range(1, 10)], dtype=np.float64)
+    observed = observed / (observed.sum() + 1e-9)
+    # Benford's expected distribution
+    benford = np.log10(1 + 1.0 / np.arange(1, 10))
+    # Chi-squared statistic
+    chi2 = float(np.sum((observed - benford) ** 2 / (benford + 1e-9)))
+    # KL divergence
+    kl_div = float(np.sum(observed * np.log((observed + 1e-9) / (benford + 1e-9))))
+    if chi2 < 0.005:
+        score = -0.4
+        note = f"Excellent Benford's law fit (χ²={chi2:.5f}, natural image)"
+    elif chi2 < 0.02:
+        score = -0.1
+        note = f"Good Benford's law fit (χ²={chi2:.5f})"
+    elif chi2 < 0.05:
+        score = 0.3
+        note = f"Moderate Benford's deviation (χ²={chi2:.5f})"
+    else:
+        score = 0.6
+        note = f"Strong Benford's law violation (χ²={chi2:.5f}, AI-like)"
+    return {
+        "test": "Benford's Law",
+        "chi_squared": round(chi2, 6),
+        "kl_divergence": round(kl_div, 6),
+        "observed": observed.tolist(),
+        "benford_expected": benford.tolist(),
+        "score": score,
+        "note": note,
+    }
+# ─── Gradient Sparsity ──────────────────────────────────────────────
+def analyze_gradient_sparsity(img: Image.Image) -> Dict[str, Any]:
+    """
+    Natural images have sparse gradients (kurtosis > 3).
+    AI images often have smoother gradients with lower kurtosis.
+    """
+    gray = np.array(img.convert("L")).astype(np.float64)
+    # Compute gradients
+    gx = np.diff(gray, axis=1)
+    gy = np.diff(gray, axis=0)
+    # Combine
+    gx_flat = gx.ravel()
+    gy_flat = gy.ravel()
+    all_grads = np.concatenate([gx_flat, gy_flat])
+    kurt_val = float(scipy_kurtosis(all_grads, fisher=True))
+    # Sparsity: fraction of near-zero gradients
+    threshold = np.std(all_grads) * 0.1
+    sparsity = float(np.mean(np.abs(all_grads) < threshold))
+    if kurt_val > 5.0 and sparsity > 0.4:
+        score = -0.4
+        note = f"Sparse gradients (kurtosis={kurt_val:.2f}, sparsity={sparsity:.2f}, natural)"
+    elif kurt_val < 2.0:
+        score = 0.5
+        note = f"Low gradient kurtosis ({kurt_val:.2f}), unnaturally smooth"
+    elif kurt_val < 3.5:
+        score = 0.2
+        note = f"Borderline gradient statistics (kurtosis={kurt_val:.2f})"
+    else:
+        score = -0.1
+        note = f"Normal gradient statistics (kurtosis={kurt_val:.2f})"
+    return {
+        "test": "Gradient Sparsity",
+        "kurtosis": round(kurt_val, 4),
+        "sparsity": round(sparsity, 4),
+        "gradient_mean": round(float(np.mean(np.abs(all_grads))), 4),
+        "score": score,
+        "note": note,
+    }
+# ─── Main Agent Entry Point ─────────────────────────────────────────
+def run_statistical_agent(img: Image.Image) -> AgentEvidence:
+    """Run all statistical priors tests."""
+    findings = []
+    scores = []
+    for fn in [analyze_dct_distribution, analyze_benford, analyze_gradient_sparsity]:
+        try:
+            result = fn(img)
+            findings.append(result)
+            scores.append(result["score"])
+        except Exception as e:
+            findings.append({"test": fn.__name__, "error": str(e), "score": 0})
+    avg_score = float(np.mean(scores)) if scores else 0.0
+    confidence = min(1.0, 0.5 + 0.5 * abs(avg_score))
+    violations = [f["test"] for f in findings if f.get("score", 0) > 0.2]
+    compliant = [f["test"] for f in findings if f.get("score", 0) < -0.1]
+    if violations:
+        rationale = f"Statistical violations: {', '.join(violations)}."
+    elif compliant:
+        rationale = f"Natural statistics confirmed: {', '.join(compliant)}."
+    else:
+        rationale = "Statistical analysis inconclusive."
+    for f in findings:
+        if f.get("note"):
+            rationale += f" [{f['test']}]: {f['note']}."
+    return AgentEvidence(
+        agent_name="Statistical Priors Agent",
+        violation_score=np.clip(avg_score, -1, 1),
+        confidence=confidence,
+        failure_prob=max(0.0, 1.0 - len(scores) / 3),
+        rationale=rationale,
+        sub_findings=findings,
+    )