anky2002 commited on
Commit
2f3f5e8
Β·
verified Β·
1 Parent(s): ceda018

Upload agents/statistical_agent.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. agents/statistical_agent.py +220 -0
agents/statistical_agent.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FORENSIQ β€” Statistical Priors Agent
3
+ Tests natural image statistics violations:
4
+ - DCT coefficient distribution (Laplacian vs Gaussian)
5
+ - Benford's law on first digits of DCT coefficients
6
+ - Gradient sparsity (kurtosis > 3 for natural images)
7
+ """
8
+
9
+ import numpy as np
10
+ from PIL import Image
11
+ from scipy.fftpack import dct
12
+ from scipy.stats import kurtosis as scipy_kurtosis, entropy
13
+ from typing import Dict, Any
14
+
15
+ from agents.optical_agent import AgentEvidence
16
+
17
+
18
+ # ─── DCT Coefficient Distribution ───────────────────────────────────
19
+ def analyze_dct_distribution(img: Image.Image) -> Dict[str, Any]:
20
+ """
21
+ Natural image DCT coefficients follow a Laplacian (heavy-tailed)
22
+ distribution. AI-generated images often follow a Gaussian.
23
+ """
24
+ gray = np.array(img.convert("L")).astype(np.float64)
25
+ h, w = gray.shape
26
+ h_crop, w_crop = (h // 8) * 8, (w // 8) * 8
27
+ gray = gray[:h_crop, :w_crop]
28
+
29
+ coeffs = []
30
+ for i in range(0, h_crop, 8):
31
+ for j in range(0, w_crop, 8):
32
+ block = gray[i:i + 8, j:j + 8]
33
+ dct_block = dct(dct(block.T, norm="ortho").T, norm="ortho")
34
+ # Skip DC coefficient
35
+ ac = dct_block.copy()
36
+ ac[0, 0] = 0
37
+ coeffs.extend(ac.flatten().tolist())
38
+
39
+ coeffs = np.array(coeffs)
40
+ coeffs = coeffs[coeffs != 0]
41
+
42
+ if len(coeffs) < 100:
43
+ return {"test": "DCT Distribution", "score": 0.0, "note": "Insufficient data"}
44
+
45
+ # Kurtosis: Laplacian β‰ˆ 6, Gaussian β‰ˆ 3
46
+ kurt = float(scipy_kurtosis(coeffs, fisher=True))
47
+
48
+ if kurt > 4.5:
49
+ score = -0.4
50
+ note = f"DCT kurtosis={kurt:.2f} (Laplacian-like, consistent with natural images)"
51
+ elif kurt < 2.0:
52
+ score = 0.5
53
+ note = f"DCT kurtosis={kurt:.2f} (Gaussian-like, inconsistent with natural images)"
54
+ elif kurt < 3.5:
55
+ score = 0.2
56
+ note = f"DCT kurtosis={kurt:.2f} (borderline, mildly Gaussian)"
57
+ else:
58
+ score = -0.1
59
+ note = f"DCT kurtosis={kurt:.2f} (near-natural)"
60
+
61
+ return {
62
+ "test": "DCT Distribution",
63
+ "kurtosis": round(kurt, 4),
64
+ "mean": round(float(np.mean(coeffs)), 4),
65
+ "std": round(float(np.std(coeffs)), 4),
66
+ "score": score,
67
+ "note": note,
68
+ }
69
+
70
+
71
+ # ─── Benford's Law ──────────────────────────────────────────────────
72
+ def analyze_benford(img: Image.Image) -> Dict[str, Any]:
73
+ """
74
+ First-digit distribution of DCT coefficients should follow
75
+ Benford's Law in natural images. AI images deviate.
76
+ """
77
+ gray = np.array(img.convert("L")).astype(np.float64)
78
+ h, w = gray.shape
79
+ h_crop, w_crop = (h // 8) * 8, (w // 8) * 8
80
+ gray = gray[:h_crop, :w_crop]
81
+
82
+ coeffs = []
83
+ for i in range(0, h_crop, 8):
84
+ for j in range(0, w_crop, 8):
85
+ block = gray[i:i + 8, j:j + 8]
86
+ dct_block = dct(dct(block.T, norm="ortho").T, norm="ortho")
87
+ coeffs.extend(np.abs(dct_block.flatten()).tolist())
88
+
89
+ coeffs = np.array(coeffs)
90
+ nonzero = coeffs[coeffs > 0]
91
+
92
+ if len(nonzero) < 100:
93
+ return {"test": "Benford's Law", "score": 0.0, "note": "Insufficient data"}
94
+
95
+ # Extract first digits
96
+ log_vals = np.floor(np.log10(nonzero + 1e-12))
97
+ first_digits = np.floor(nonzero / (10 ** log_vals)).astype(int)
98
+ first_digits = first_digits[(first_digits >= 1) & (first_digits <= 9)]
99
+
100
+ observed = np.array([np.sum(first_digits == d) for d in range(1, 10)], dtype=np.float64)
101
+ observed = observed / (observed.sum() + 1e-9)
102
+
103
+ # Benford's expected distribution
104
+ benford = np.log10(1 + 1.0 / np.arange(1, 10))
105
+
106
+ # Chi-squared statistic
107
+ chi2 = float(np.sum((observed - benford) ** 2 / (benford + 1e-9)))
108
+
109
+ # KL divergence
110
+ kl_div = float(np.sum(observed * np.log((observed + 1e-9) / (benford + 1e-9))))
111
+
112
+ if chi2 < 0.005:
113
+ score = -0.4
114
+ note = f"Excellent Benford's law fit (χ²={chi2:.5f}, natural image)"
115
+ elif chi2 < 0.02:
116
+ score = -0.1
117
+ note = f"Good Benford's law fit (χ²={chi2:.5f})"
118
+ elif chi2 < 0.05:
119
+ score = 0.3
120
+ note = f"Moderate Benford's deviation (χ²={chi2:.5f})"
121
+ else:
122
+ score = 0.6
123
+ note = f"Strong Benford's law violation (χ²={chi2:.5f}, AI-like)"
124
+
125
+ return {
126
+ "test": "Benford's Law",
127
+ "chi_squared": round(chi2, 6),
128
+ "kl_divergence": round(kl_div, 6),
129
+ "observed": observed.tolist(),
130
+ "benford_expected": benford.tolist(),
131
+ "score": score,
132
+ "note": note,
133
+ }
134
+
135
+
136
+ # ─── Gradient Sparsity ──────────────────────────────────────────────
137
+ def analyze_gradient_sparsity(img: Image.Image) -> Dict[str, Any]:
138
+ """
139
+ Natural images have sparse gradients (kurtosis > 3).
140
+ AI images often have smoother gradients with lower kurtosis.
141
+ """
142
+ gray = np.array(img.convert("L")).astype(np.float64)
143
+
144
+ # Compute gradients
145
+ gx = np.diff(gray, axis=1)
146
+ gy = np.diff(gray, axis=0)
147
+
148
+ # Combine
149
+ gx_flat = gx.ravel()
150
+ gy_flat = gy.ravel()
151
+ all_grads = np.concatenate([gx_flat, gy_flat])
152
+
153
+ kurt_val = float(scipy_kurtosis(all_grads, fisher=True))
154
+
155
+ # Sparsity: fraction of near-zero gradients
156
+ threshold = np.std(all_grads) * 0.1
157
+ sparsity = float(np.mean(np.abs(all_grads) < threshold))
158
+
159
+ if kurt_val > 5.0 and sparsity > 0.4:
160
+ score = -0.4
161
+ note = f"Sparse gradients (kurtosis={kurt_val:.2f}, sparsity={sparsity:.2f}, natural)"
162
+ elif kurt_val < 2.0:
163
+ score = 0.5
164
+ note = f"Low gradient kurtosis ({kurt_val:.2f}), unnaturally smooth"
165
+ elif kurt_val < 3.5:
166
+ score = 0.2
167
+ note = f"Borderline gradient statistics (kurtosis={kurt_val:.2f})"
168
+ else:
169
+ score = -0.1
170
+ note = f"Normal gradient statistics (kurtosis={kurt_val:.2f})"
171
+
172
+ return {
173
+ "test": "Gradient Sparsity",
174
+ "kurtosis": round(kurt_val, 4),
175
+ "sparsity": round(sparsity, 4),
176
+ "gradient_mean": round(float(np.mean(np.abs(all_grads))), 4),
177
+ "score": score,
178
+ "note": note,
179
+ }
180
+
181
+
182
+ # ─── Main Agent Entry Point ─────────────────────────────────────────
183
+ def run_statistical_agent(img: Image.Image) -> AgentEvidence:
184
+ """Run all statistical priors tests."""
185
+ findings = []
186
+ scores = []
187
+
188
+ for fn in [analyze_dct_distribution, analyze_benford, analyze_gradient_sparsity]:
189
+ try:
190
+ result = fn(img)
191
+ findings.append(result)
192
+ scores.append(result["score"])
193
+ except Exception as e:
194
+ findings.append({"test": fn.__name__, "error": str(e), "score": 0})
195
+
196
+ avg_score = float(np.mean(scores)) if scores else 0.0
197
+ confidence = min(1.0, 0.5 + 0.5 * abs(avg_score))
198
+
199
+ violations = [f["test"] for f in findings if f.get("score", 0) > 0.2]
200
+ compliant = [f["test"] for f in findings if f.get("score", 0) < -0.1]
201
+
202
+ if violations:
203
+ rationale = f"Statistical violations: {', '.join(violations)}."
204
+ elif compliant:
205
+ rationale = f"Natural statistics confirmed: {', '.join(compliant)}."
206
+ else:
207
+ rationale = "Statistical analysis inconclusive."
208
+
209
+ for f in findings:
210
+ if f.get("note"):
211
+ rationale += f" [{f['test']}]: {f['note']}."
212
+
213
+ return AgentEvidence(
214
+ agent_name="Statistical Priors Agent",
215
+ violation_score=np.clip(avg_score, -1, 1),
216
+ confidence=confidence,
217
+ failure_prob=max(0.0, 1.0 - len(scores) / 3),
218
+ rationale=rationale,
219
+ sub_findings=findings,
220
+ )