FORENSIQ / agents /statistical_agent.py
anky2002's picture
Upload agents/statistical_agent.py with huggingface_hub
2eaf642 verified
"""FORENSIQ — Statistical Priors Agent (22 features)"""
import numpy as np
from PIL import Image
from scipy.fftpack import dct
from scipy.stats import kurtosis as sp_kurt, skew as sp_skew, entropy as sp_entropy
from scipy.ndimage import gaussian_filter, sobel, uniform_filter
from typing import Dict, Any
from agents.optical_agent import AgentEvidence
def _g(img): return np.array(img.convert("L")).astype(np.float64)
def _rgb(img): return np.array(img.convert("RGB")).astype(np.float64)
def t01_dct_kurtosis(img):
gray=_g(img); h,w=gray.shape; hc,wc=(h//8)*8,(w//8)*8; gray=gray[:hc,:wc]
coeffs=[]
for i in range(0,hc,8):
for j in range(0,wc,8):
b=gray[i:i+8,j:j+8]; d=dct(dct(b.T,norm="ortho").T,norm="ortho"); ac=d.copy(); ac[0,0]=0
coeffs.extend(ac.ravel().tolist())
c=np.array(coeffs); c=c[c!=0]
if len(c)<100: return {"test":"DCT Kurtosis","score":0.0,"note":"Insufficient data"}
k=float(sp_kurt(c,fisher=True))
# Natural photos: kurtosis 5-350. AI over-sharpening: >400.
# Too-low kurtosis (<2) = Gaussian = old GAN artifacts.
# Too-high kurtosis (>400) = pathological sharpness = modern AI.
if k>400: s,n=0.3,f"Pathologically high DCT kurtosis (κ={k:.0f}) — AI over-sharpening"
elif k>4.5: s,n=-0.4,f"Laplacian DCT (κ={k:.2f})"
elif k<2.0: s,n=0.5,f"Gaussian DCT (κ={k:.2f})"
elif k<3.5: s,n=0.2,f"Borderline (κ={k:.2f})"
else: s,n=-0.1,f"Near-natural (κ={k:.2f})"
return {"test":"DCT Kurtosis","kurtosis":round(k,4),"score":s,"note":n}
def t02_benford(img):
gray=_g(img); h,w=gray.shape; hc,wc=(h//8)*8,(w//8)*8; gray=gray[:hc,:wc]
coeffs=[]
for i in range(0,hc,8):
for j in range(0,wc,8):
coeffs.extend(np.abs(dct(dct(gray[i:i+8,j:j+8].T,norm="ortho").T,norm="ortho").ravel()).tolist())
c=np.array(coeffs); nz=c[c>0]
if len(nz)<100: return {"test":"Benford's Law","score":0.0,"note":"Insufficient"}
lv=np.floor(np.log10(nz+1e-12)); fd=np.floor(nz/(10**lv)).astype(int); fd=fd[(fd>=1)&(fd<=9)]
obs=np.array([np.sum(fd==d) for d in range(1,10)],dtype=float); obs/=(obs.sum()+1e-9)
ben=np.log10(1+1.0/np.arange(1,10))
chi2=float(np.sum((obs-ben)**2/(ben+1e-9)))
if chi2<0.005: s,n=-0.4,f"Excellent Benford fit (χ²={chi2:.5f})"
elif chi2<0.02: s,n=-0.1,f"Good fit (χ²={chi2:.5f})"
elif chi2<0.05: s,n=0.3,f"Moderate deviation (χ²={chi2:.5f})"
else: s,n=0.6,f"Strong violation (χ²={chi2:.5f})"
return {"test":"Benford's Law","chi2":round(chi2,6),"observed":obs.tolist(),"benford_expected":ben.tolist(),"score":s,"note":n}
def t03_gradient_sparsity(img):
gray=_g(img); gx=np.diff(gray,axis=1).ravel(); gy=np.diff(gray,axis=0).ravel()
ag=np.concatenate([gx,gy]); k=float(sp_kurt(ag,fisher=True))
thr=np.std(ag)*0.1; sp=float(np.mean(np.abs(ag)<thr))
if k>5 and sp>0.4: s,n=-0.4,f"Sparse gradients (κ={k:.2f}, sp={sp:.2f})"
elif k<2: s,n=0.5,f"Low kurtosis ({k:.2f})"
elif k<3.5: s,n=0.2,f"Borderline ({k:.2f})"
else: s,n=-0.1,f"Normal (κ={k:.2f})"
return {"test":"Gradient Sparsity","kurtosis":round(k,4),"sparsity":round(sp,4),"score":s,"note":n}
def t04_local_kurtosis(img):
gray=_g(img); h,w=gray.shape; bs=32; hc,wc=(h//bs)*bs,(w//bs)*bs; gray=gray[:hc,:wc]
lk=[]
for i in range(0,hc,bs):
for j in range(0,wc,bs):
b=gray[i:i+bs,j:j+bs].ravel()
if np.std(b)>1: lk.append(float(sp_kurt(b,fisher=True)))
if len(lk)<10: return {"test":"Local Kurtosis Map","score":0.0,"note":"Insufficient"}
std=float(np.std(lk))
if std>3: s,n=-0.3,f"High kurtosis variation (σ={std:.2f})"
elif std<1: s,n=0.4,f"Uniform statistics (σ={std:.2f})"
else: s,n=0.0,f"Moderate (σ={std:.2f})"
return {"test":"Local Kurtosis Map","kurtosis_std":round(std,4),"score":s,"note":n}
def t05_color_histogram(img):
rgb=np.array(img.convert("RGB")); scores=[]
for c in range(3):
h,_=np.histogram(rgb[:,:,c].ravel(),bins=256,range=(0,256))
sm=gaussian_filter(h.astype(float),2)
scores.append(float(np.mean(np.abs(h-sm))/(np.mean(h)+1e-9)))
avg=float(np.mean(scores))
if avg<0.3: s,n=-0.2,f"Smooth histograms ({avg:.3f})"
elif avg>0.8: s,n=0.4,f"Irregular histograms ({avg:.3f})"
else: s,n=0.0,f"Histogram smoothness={avg:.3f}"
return {"test":"Color Histogram","smoothness":round(avg,4),"score":s,"note":n}
def t06_wavelet_kurtosis(img):
gray=_g(img); h,w=gray.shape; h2,w2=h//2*2,w//2*2; gray=gray[:h2,:w2]
lh=(gray[0::2,0::2]+gray[0::2,1::2]-gray[1::2,0::2]-gray[1::2,1::2])/4
hl=(gray[0::2,0::2]-gray[0::2,1::2]+gray[1::2,0::2]-gray[1::2,1::2])/4
hh=(gray[0::2,0::2]-gray[0::2,1::2]-gray[1::2,0::2]+gray[1::2,1::2])/4
hf=np.concatenate([lh.ravel(),hl.ravel(),hh.ravel()]); hf=hf[hf!=0]
if len(hf)<100: return {"test":"Wavelet Kurtosis","score":0.0,"note":"Insufficient"}
k=float(sp_kurt(hf,fisher=True))
# Same ceiling logic: AI over-sharpening produces kurtosis > 60
if k>60: s,n=0.2,f"Pathologically high wavelet kurtosis (κ={k:.1f}) — AI over-sharpening"
elif k>5: s,n=-0.3,f"Heavy-tailed wavelets (κ={k:.2f})"
elif k<1.5: s,n=0.4,f"Gaussian wavelets (κ={k:.2f})"
else: s,n=0.0,f"Wavelet κ={k:.2f}"
return {"test":"Wavelet Kurtosis","kurtosis":round(k,4),"score":s,"note":n}
def t07_entropy_map(img):
gray=_g(img); h,w=gray.shape; bs=32; ents=[]
for i in range(0,h-bs,bs):
for j in range(0,w-bs,bs):
b=gray[i:i+bs,j:j+bs].ravel().astype(int)
h_,_=np.histogram(b,bins=64,range=(0,256)); h_=h_.astype(float); h_/=(h_.sum()+1e-9)
ents.append(-float(np.sum(h_*np.log2(h_+1e-12))))
if len(ents)<4: return {"test":"Entropy Map","score":0.0,"note":"Too small"}
std=float(np.std(ents)); mn=float(np.mean(ents))
if std>0.5: s,n=-0.2,f"Varied local entropy (σ={std:.2f})"
elif std<0.15: s,n=0.3,f"Uniform entropy (σ={std:.2f})"
else: s,n=0.0,f"Entropy σ={std:.2f}"
return {"test":"Entropy Map","entropy_std":round(std,4),"mean":round(mn,4),"score":s,"note":n}
def t08_edge_orientation(img):
gray=_g(img); gx=sobel(gray,1); gy=sobel(gray,0); mag=np.hypot(gx,gy)
strong=mag>np.percentile(mag,80); angles=np.arctan2(gy[strong],gx[strong])
hist,_=np.histogram(angles,bins=36,range=(-np.pi,np.pi)); hist=hist.astype(float); hist/=(hist.sum()+1e-9)
ent=-float(np.sum(hist*np.log(hist+1e-9)))
max_ent=np.log(36)
norm_ent=ent/max_ent
if norm_ent<0.85: s,n=-0.2,f"Directional edges (entropy={norm_ent:.3f})"
elif norm_ent>0.95: s,n=0.2,f"Isotropic edges ({norm_ent:.3f})"
else: s,n=0.0,f"Edge entropy={norm_ent:.3f}"
return {"test":"Edge Orientation","entropy":round(norm_ent,4),"score":s,"note":n}
def t09_lbp_distribution(img):
gray=np.array(img.convert("L")); h,w=gray.shape
# Simplified LBP
lbp=np.zeros((h-2,w-2),dtype=int)
for dy,dx,bit in [(-1,-1,0),(-1,0,1),(-1,1,2),(0,1,3),(1,1,4),(1,0,5),(1,-1,6),(0,-1,7)]:
lbp|=((gray[1+dy:h-1+dy,1+dx:w-1+dx]>=gray[1:h-1,1:w-1]).astype(int)<<bit)
hist,_=np.histogram(lbp.ravel(),bins=256,range=(0,256)); hist=hist.astype(float); hist/=(hist.sum()+1e-9)
# Uniform LBP patterns (≤2 transitions) dominate in natural images
uniform=0
for v in range(256):
b=format(v,'08b'); t=sum(1 for i in range(7) if b[i]!=b[i+1])+int(b[0]!=b[7])
if t<=2: uniform+=hist[v]
if uniform>0.6: s,n=-0.2,f"Natural LBP (uniform={uniform:.2%})"
elif uniform<0.3: s,n=0.3,f"Non-uniform LBP ({uniform:.2%})"
else: s,n=0.0,f"LBP uniform={uniform:.2%}"
return {"test":"LBP Distribution","uniform_ratio":round(uniform,4),"score":s,"note":n}
def t10_cooccurrence(img):
gray=(np.array(img.convert("L"))//16).astype(int); h,w=gray.shape
# Vectorized GLCM — horizontal adjacency
glcm=np.zeros((16,16))
np.add.at(glcm, (gray[:,:-1].ravel(), gray[:,1:].ravel()), 1)
glcm/=(glcm.sum()+1e-9)
energy=float(np.sum(glcm**2))
I,J=np.mgrid[0:16,0:16]; homog=float(np.sum(glcm/(1+np.abs(I-J))))
if energy<0.05 and homog>0.5: s,n=-0.2,f"Natural texture (E={energy:.4f}, H={homog:.3f})"
elif energy>0.2: s,n=0.3,f"Flat/repetitive (E={energy:.4f})"
else: s,n=0.0,f"GLCM E={energy:.4f}, H={homog:.3f}"
return {"test":"Co-occurrence Matrix","energy":round(energy,4),"homogeneity":round(homog,4),"score":s,"note":n}
def t11_block_variance(img):
gray=_g(img); h,w=gray.shape; bs=8; hc,wc=(h//bs)*bs,(w//bs)*bs
gray=gray[:hc,:wc]; bvars=[]
for i in range(0,hc,bs):
for j in range(0,wc,bs):
bvars.append(float(np.var(gray[i:i+bs,j:j+bs])))
bv=np.array(bvars)
# ANOVA-like test: variance of variances
vov=float(np.std(bv))/(float(np.mean(bv))+1e-9)
if vov>1: s,n=-0.2,f"Varied block variance (VoV={vov:.3f})"
elif vov<0.3: s,n=0.3,f"Uniform block variance ({vov:.3f})"
else: s,n=0.0,f"VoV={vov:.3f}"
return {"test":"Block Variance ANOVA","vov":round(vov,4),"score":s,"note":n}
def t12_gradient_magnitude(img):
gray=_g(img); gm=np.hypot(sobel(gray,0),sobel(gray,1))
k=float(sp_kurt(gm.ravel(),fisher=True)); sk=float(sp_skew(gm.ravel()))
if k>5: s,n=-0.2,f"Heavy-tailed gradients (κ={k:.2f})"
elif k<2: s,n=0.3,f"Light-tailed ({k:.2f})"
else: s,n=0.0,f"Gradient κ={k:.2f}"
return {"test":"Gradient Magnitude Dist","kurtosis":round(k,3),"skewness":round(sk,3),"score":s,"note":n}
def t13_spatial_correlation(img):
gray=_g(img); h,w=gray.shape; step=max(1,h*w//200000)
ac1=float(np.corrcoef(gray[:,:-1].ravel()[::step],gray[:,1:].ravel()[::step])[0,1])
ac5=float(np.corrcoef(gray[:,:-5].ravel()[::step],gray[:,5:].ravel()[::step])[0,1])
decay=ac1-ac5
if 0.05<decay<0.3: s,n=-0.2,f"Natural correlation decay ({decay:.3f})"
elif decay<0.01: s,n=0.3,f"Flat correlation ({decay:.3f})"
else: s,n=0.0,f"Decay={decay:.3f}"
return {"test":"Spatial Correlation Decay","decay":round(decay,4),"score":s,"note":n}
def t14_dct_skewness(img):
gray=_g(img); h,w=gray.shape; hc,wc=(h//8)*8,(w//8)*8; gray=gray[:hc,:wc]
coeffs=[]
for i in range(0,hc,8):
for j in range(0,wc,8):
d=dct(dct(gray[i:i+8,j:j+8].T,norm="ortho").T,norm="ortho"); ac=d.copy(); ac[0,0]=0
coeffs.extend(ac.ravel().tolist())
c=np.array(coeffs); c=c[c!=0]
if len(c)<100: return {"test":"DCT Skewness","score":0.0,"note":"Insufficient"}
sk=float(sp_skew(c))
if abs(sk)<0.1: s,n=-0.2,f"Symmetric DCT (skew={sk:.3f})"
elif abs(sk)>0.5: s,n=0.3,f"Skewed DCT ({sk:.3f})"
else: s,n=0.0,f"DCT skew={sk:.3f}"
return {"test":"DCT Skewness","skewness":round(sk,4),"score":s,"note":n}
def t15_saturation_distribution(img):
rgb=np.array(img.convert("RGB")).astype(float)
mx=np.max(rgb,axis=-1); mn=np.min(rgb,axis=-1)
sat=(mx-mn)/(mx+1e-9); sat_flat=sat.ravel()
k=float(sp_kurt(sat_flat,fisher=True))
if k>3: s,n=-0.2,f"Natural saturation (κ={k:.2f})"
elif k<1: s,n=0.3,f"Unusual saturation ({k:.2f})"
else: s,n=0.0,f"Saturation κ={k:.2f}"
return {"test":"Saturation Distribution","kurtosis":round(k,3),"score":s,"note":n}
def t16_luminance_gradient_ratio(img):
gray=_g(img); gx=np.abs(np.diff(gray,axis=1)); gy=np.abs(np.diff(gray,axis=0))
hg=float(np.mean(gx)); vg=float(np.mean(gy))
ratio=hg/(vg+1e-9)
if 0.7<ratio<1.4: s,n=-0.1,f"Balanced H/V gradients ({ratio:.3f})"
elif ratio>2 or ratio<0.5: s,n=0.2,f"Extreme H/V bias ({ratio:.3f})"
else: s,n=0.0,f"H/V ratio={ratio:.3f}"
return {"test":"H/V Gradient Ratio","ratio":round(ratio,3),"score":s,"note":n}
def t17_pixel_uniqueness(img):
gray=np.array(img.convert("L")); total=gray.size; unique=len(np.unique(gray))
ratio=unique/256
if ratio>0.9: s,n=-0.1,f"Full tonal range ({unique} levels)"
elif ratio<0.5: s,n=0.2,f"Limited range ({unique} levels)"
else: s,n=0.0,f"{unique} levels"
return {"test":"Pixel Uniqueness","levels":unique,"score":s,"note":n}
def t18_global_entropy(img):
gray=np.array(img.convert("L")); hist,_=np.histogram(gray,bins=256,range=(0,256))
hist=hist.astype(float); hist/=(hist.sum()+1e-9)
ent=-float(np.sum(hist*np.log2(hist+1e-12)))
if 6<ent<7.8: s,n=-0.2,f"Natural entropy ({ent:.3f})"
elif ent<5: s,n=0.3,f"Low entropy ({ent:.3f})"
else: s,n=0.0,f"Entropy={ent:.3f}"
return {"test":"Global Entropy","entropy":round(ent,4),"score":s,"note":n}
def t19_power_law_fit(img):
gray=_g(img); gm=np.hypot(sobel(gray,0),sobel(gray,1)).ravel()
gm=gm[gm>1]; hist,edges=np.histogram(gm,bins=50); hist=hist.astype(float)+1
centers=(edges[:-1]+edges[1:])/2; valid=hist>1
if np.sum(valid)<5: return {"test":"Power Law Gradient","score":0.0,"note":"Insufficient"}
try:
c=np.polyfit(np.log(centers[valid]),np.log(hist[valid]),1); slope=float(c[0])
except: slope=0
if -3<slope<-1: s,n=-0.2,f"Power-law gradients (α={slope:.2f})"
elif slope>-0.5: s,n=0.3,f"Non-power-law ({slope:.2f})"
else: s,n=0.0,f"Slope={slope:.2f}"
return {"test":"Power Law Gradient","slope":round(slope,3),"score":s,"note":n}
def t20_contrast_distribution(img):
gray=_g(img); h,w=gray.shape; bs=16
contrasts=[]
for i in range(0,h-bs,bs):
for j in range(0,w-bs,bs):
b=gray[i:i+bs,j:j+bs]; contrasts.append(float(np.max(b)-np.min(b)))
c=np.array(contrasts)
if len(c)<10: return {"test":"Contrast Distribution","score":0.0,"note":"Insufficient"}
k=float(sp_kurt(c,fisher=True))
if k>2: s,n=-0.2,f"Natural contrast variation (κ={k:.2f})"
elif k<0.5: s,n=0.2,f"Uniform contrast ({k:.2f})"
else: s,n=0.0,f"Contrast κ={k:.2f}"
return {"test":"Contrast Distribution","kurtosis":round(k,3),"score":s,"note":n}
def t21_joint_histogram(img):
rgb=np.array(img.convert("RGB")); r,g=rgb[:,:,0].ravel(),rgb[:,:,1].ravel()
step=max(1,len(r)//100000)
h2d,_,_=np.histogram2d(r[::step],g[::step],bins=32,range=[[0,256],[0,256]])
h2d/=(h2d.sum()+1e-9)
# Mutual information
hr=np.sum(h2d,axis=1); hg=np.sum(h2d,axis=0)
mi=float(np.sum(h2d*np.log2(h2d/(np.outer(hr,hg)+1e-12)+1e-12)))
if mi>0.5: s,n=-0.2,f"Natural color correlation (MI={mi:.3f})"
elif mi<0.1: s,n=0.2,f"Weak color correlation ({mi:.3f})"
else: s,n=0.0,f"MI={mi:.3f}"
return {"test":"Joint Color Histogram","mi":round(mi,4),"score":s,"note":n}
def t22_run_length(img):
gray=np.array(img.convert("L")); h,w=gray.shape
# Sample 10 rows and 10 columns spread across the image
all_runs=[]
row_indices = np.linspace(0, h-1, min(10, h), dtype=int)
col_indices = np.linspace(0, w-1, min(10, w), dtype=int)
for ri in row_indices:
row=gray[ri,:]; cur=1
for i in range(1,len(row)):
if row[i]==row[i-1]: cur+=1
else: all_runs.append(cur); cur=1
all_runs.append(cur)
for ci in col_indices:
col=gray[:,ci]; cur=1
for i in range(1,len(col)):
if col[i]==col[i-1]: cur+=1
else: all_runs.append(cur); cur=1
all_runs.append(cur)
runs=np.array(all_runs)
if len(runs)<10: return {"test":"Run Length Analysis","score":0.0,"note":"Insufficient data"}
avg_run=float(np.mean(runs))
if 1<avg_run<5: s,n=-0.2,f"Natural run lengths (avg={avg_run:.2f})"
elif avg_run>10: s,n=0.3,f"Long runs ({avg_run:.2f}) — flat patches"
else: s,n=0.0,f"Run avg={avg_run:.2f}"
return {"test":"Run Length Analysis","avg_run":round(avg_run,3),"score":s,"note":n}
ALL_TESTS=[t01_dct_kurtosis,t02_benford,t03_gradient_sparsity,t04_local_kurtosis,t05_color_histogram,
t06_wavelet_kurtosis,t07_entropy_map,t08_edge_orientation,t09_lbp_distribution,t10_cooccurrence,
t11_block_variance,t12_gradient_magnitude,t13_spatial_correlation,t14_dct_skewness,
t15_saturation_distribution,t16_luminance_gradient_ratio,t17_pixel_uniqueness,t18_global_entropy,
t19_power_law_fit,t20_contrast_distribution,t21_joint_histogram,t22_run_length]
def run_statistical_agent(img, modality_adjustments=None):
from agents.utils import run_agent_tests
from agents.optical_agent import AgentEvidence
findings, avg, conf, fail, rat = run_agent_tests(ALL_TESTS, img, "Statistical Priors Agent", modality_adjustments)
return AgentEvidence("Statistical Priors Agent", np.clip(avg,-1,1), conf, fail, rat, findings)