Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

FORENSIQ / agents /statistical_agent.py

anky2002

Upload agents/statistical_agent.py with huggingface_hub

2eaf642 verified 14 days ago

raw

history blame contribute delete

16.4 kB

	"""FORENSIQ — Statistical Priors Agent (22 features)"""
	import numpy as np
	from PIL import Image
	from scipy.fftpack import dct
	from scipy.stats import kurtosis as sp_kurt, skew as sp_skew, entropy as sp_entropy
	from scipy.ndimage import gaussian_filter, sobel, uniform_filter
	from typing import Dict, Any
	from agents.optical_agent import AgentEvidence

	def _g(img): return np.array(img.convert("L")).astype(np.float64)
	def _rgb(img): return np.array(img.convert("RGB")).astype(np.float64)

	def t01_dct_kurtosis(img):
	gray=_g(img); h,w=gray.shape; hc,wc=(h//8)8,(w//8)8; gray=gray[:hc,:wc]
	coeffs=[]
	for i in range(0,hc,8):
	for j in range(0,wc,8):
	b=gray[i:i+8,j:j+8]; d=dct(dct(b.T,norm="ortho").T,norm="ortho"); ac=d.copy(); ac[0,0]=0
	coeffs.extend(ac.ravel().tolist())
	c=np.array(coeffs); c=c[c!=0]
	if len(c)<100: return {"test":"DCT Kurtosis","score":0.0,"note":"Insufficient data"}
	k=float(sp_kurt(c,fisher=True))
	# Natural photos: kurtosis 5-350. AI over-sharpening: >400.
	# Too-low kurtosis (<2) = Gaussian = old GAN artifacts.
	# Too-high kurtosis (>400) = pathological sharpness = modern AI.
	if k>400: s,n=0.3,f"Pathologically high DCT kurtosis (κ={k:.0f}) — AI over-sharpening"
	elif k>4.5: s,n=-0.4,f"Laplacian DCT (κ={k:.2f})"
	elif k<2.0: s,n=0.5,f"Gaussian DCT (κ={k:.2f})"
	elif k<3.5: s,n=0.2,f"Borderline (κ={k:.2f})"
	else: s,n=-0.1,f"Near-natural (κ={k:.2f})"
	return {"test":"DCT Kurtosis","kurtosis":round(k,4),"score":s,"note":n}

	def t02_benford(img):
	gray=_g(img); h,w=gray.shape; hc,wc=(h//8)8,(w//8)8; gray=gray[:hc,:wc]
	coeffs=[]
	for i in range(0,hc,8):
	for j in range(0,wc,8):
	coeffs.extend(np.abs(dct(dct(gray[i:i+8,j:j+8].T,norm="ortho").T,norm="ortho").ravel()).tolist())
	c=np.array(coeffs); nz=c[c>0]
	if len(nz)<100: return {"test":"Benford's Law","score":0.0,"note":"Insufficient"}
	lv=np.floor(np.log10(nz+1e-12)); fd=np.floor(nz/(10**lv)).astype(int); fd=fd[(fd>=1)&(fd<=9)]
	obs=np.array([np.sum(fd==d) for d in range(1,10)],dtype=float); obs/=(obs.sum()+1e-9)
	ben=np.log10(1+1.0/np.arange(1,10))
	chi2=float(np.sum((obs-ben)**2/(ben+1e-9)))
	if chi2<0.005: s,n=-0.4,f"Excellent Benford fit (χ²={chi2:.5f})"
	elif chi2<0.02: s,n=-0.1,f"Good fit (χ²={chi2:.5f})"
	elif chi2<0.05: s,n=0.3,f"Moderate deviation (χ²={chi2:.5f})"
	else: s,n=0.6,f"Strong violation (χ²={chi2:.5f})"
	return {"test":"Benford's Law","chi2":round(chi2,6),"observed":obs.tolist(),"benford_expected":ben.tolist(),"score":s,"note":n}

	def t03_gradient_sparsity(img):
	gray=_g(img); gx=np.diff(gray,axis=1).ravel(); gy=np.diff(gray,axis=0).ravel()
	ag=np.concatenate([gx,gy]); k=float(sp_kurt(ag,fisher=True))
	thr=np.std(ag)*0.1; sp=float(np.mean(np.abs(ag)<thr))
	if k>5 and sp>0.4: s,n=-0.4,f"Sparse gradients (κ={k:.2f}, sp={sp:.2f})"
	elif k<2: s,n=0.5,f"Low kurtosis ({k:.2f})"
	elif k<3.5: s,n=0.2,f"Borderline ({k:.2f})"
	else: s,n=-0.1,f"Normal (κ={k:.2f})"
	return {"test":"Gradient Sparsity","kurtosis":round(k,4),"sparsity":round(sp,4),"score":s,"note":n}

	def t04_local_kurtosis(img):
	gray=_g(img); h,w=gray.shape; bs=32; hc,wc=(h//bs)bs,(w//bs)bs; gray=gray[:hc,:wc]
	lk=[]
	for i in range(0,hc,bs):
	for j in range(0,wc,bs):
	b=gray[i:i+bs,j:j+bs].ravel()
	if np.std(b)>1: lk.append(float(sp_kurt(b,fisher=True)))
	if len(lk)<10: return {"test":"Local Kurtosis Map","score":0.0,"note":"Insufficient"}
	std=float(np.std(lk))
	if std>3: s,n=-0.3,f"High kurtosis variation (σ={std:.2f})"
	elif std<1: s,n=0.4,f"Uniform statistics (σ={std:.2f})"
	else: s,n=0.0,f"Moderate (σ={std:.2f})"
	return {"test":"Local Kurtosis Map","kurtosis_std":round(std,4),"score":s,"note":n}

	def t05_color_histogram(img):
	rgb=np.array(img.convert("RGB")); scores=[]
	for c in range(3):
	h,_=np.histogram(rgb[:,:,c].ravel(),bins=256,range=(0,256))
	sm=gaussian_filter(h.astype(float),2)
	scores.append(float(np.mean(np.abs(h-sm))/(np.mean(h)+1e-9)))
	avg=float(np.mean(scores))
	if avg<0.3: s,n=-0.2,f"Smooth histograms ({avg:.3f})"
	elif avg>0.8: s,n=0.4,f"Irregular histograms ({avg:.3f})"
	else: s,n=0.0,f"Histogram smoothness={avg:.3f}"
	return {"test":"Color Histogram","smoothness":round(avg,4),"score":s,"note":n}

	def t06_wavelet_kurtosis(img):
	gray=_g(img); h,w=gray.shape; h2,w2=h//22,w//22; gray=gray[:h2,:w2]
	lh=(gray[0::2,0::2]+gray[0::2,1::2]-gray[1::2,0::2]-gray[1::2,1::2])/4
	hl=(gray[0::2,0::2]-gray[0::2,1::2]+gray[1::2,0::2]-gray[1::2,1::2])/4
	hh=(gray[0::2,0::2]-gray[0::2,1::2]-gray[1::2,0::2]+gray[1::2,1::2])/4
	hf=np.concatenate([lh.ravel(),hl.ravel(),hh.ravel()]); hf=hf[hf!=0]
	if len(hf)<100: return {"test":"Wavelet Kurtosis","score":0.0,"note":"Insufficient"}
	k=float(sp_kurt(hf,fisher=True))
	# Same ceiling logic: AI over-sharpening produces kurtosis > 60
	if k>60: s,n=0.2,f"Pathologically high wavelet kurtosis (κ={k:.1f}) — AI over-sharpening"
	elif k>5: s,n=-0.3,f"Heavy-tailed wavelets (κ={k:.2f})"
	elif k<1.5: s,n=0.4,f"Gaussian wavelets (κ={k:.2f})"
	else: s,n=0.0,f"Wavelet κ={k:.2f}"
	return {"test":"Wavelet Kurtosis","kurtosis":round(k,4),"score":s,"note":n}

	def t07_entropy_map(img):
	gray=_g(img); h,w=gray.shape; bs=32; ents=[]
	for i in range(0,h-bs,bs):
	for j in range(0,w-bs,bs):
	b=gray[i:i+bs,j:j+bs].ravel().astype(int)
	h_,_=np.histogram(b,bins=64,range=(0,256)); h_=h_.astype(float); h_/=(h_.sum()+1e-9)
	ents.append(-float(np.sum(h_*np.log2(h_+1e-12))))
	if len(ents)<4: return {"test":"Entropy Map","score":0.0,"note":"Too small"}
	std=float(np.std(ents)); mn=float(np.mean(ents))
	if std>0.5: s,n=-0.2,f"Varied local entropy (σ={std:.2f})"
	elif std<0.15: s,n=0.3,f"Uniform entropy (σ={std:.2f})"
	else: s,n=0.0,f"Entropy σ={std:.2f}"
	return {"test":"Entropy Map","entropy_std":round(std,4),"mean":round(mn,4),"score":s,"note":n}

	def t08_edge_orientation(img):
	gray=_g(img); gx=sobel(gray,1); gy=sobel(gray,0); mag=np.hypot(gx,gy)
	strong=mag>np.percentile(mag,80); angles=np.arctan2(gy[strong],gx[strong])
	hist,_=np.histogram(angles,bins=36,range=(-np.pi,np.pi)); hist=hist.astype(float); hist/=(hist.sum()+1e-9)
	ent=-float(np.sum(hist*np.log(hist+1e-9)))
	max_ent=np.log(36)
	norm_ent=ent/max_ent
	if norm_ent<0.85: s,n=-0.2,f"Directional edges (entropy={norm_ent:.3f})"
	elif norm_ent>0.95: s,n=0.2,f"Isotropic edges ({norm_ent:.3f})"
	else: s,n=0.0,f"Edge entropy={norm_ent:.3f}"
	return {"test":"Edge Orientation","entropy":round(norm_ent,4),"score":s,"note":n}

	def t09_lbp_distribution(img):
	gray=np.array(img.convert("L")); h,w=gray.shape
	# Simplified LBP
	lbp=np.zeros((h-2,w-2),dtype=int)
	for dy,dx,bit in [(-1,-1,0),(-1,0,1),(-1,1,2),(0,1,3),(1,1,4),(1,0,5),(1,-1,6),(0,-1,7)]:
	lbp\|=((gray[1+dy:h-1+dy,1+dx:w-1+dx]>=gray[1:h-1,1:w-1]).astype(int)<<bit)
	hist,_=np.histogram(lbp.ravel(),bins=256,range=(0,256)); hist=hist.astype(float); hist/=(hist.sum()+1e-9)
	# Uniform LBP patterns (≤2 transitions) dominate in natural images
	uniform=0
	for v in range(256):
	b=format(v,'08b'); t=sum(1 for i in range(7) if b[i]!=b[i+1])+int(b[0]!=b[7])
	if t<=2: uniform+=hist[v]
	if uniform>0.6: s,n=-0.2,f"Natural LBP (uniform={uniform:.2%})"
	elif uniform<0.3: s,n=0.3,f"Non-uniform LBP ({uniform:.2%})"
	else: s,n=0.0,f"LBP uniform={uniform:.2%}"
	return {"test":"LBP Distribution","uniform_ratio":round(uniform,4),"score":s,"note":n}

	def t10_cooccurrence(img):
	gray=(np.array(img.convert("L"))//16).astype(int); h,w=gray.shape
	# Vectorized GLCM — horizontal adjacency
	glcm=np.zeros((16,16))
	np.add.at(glcm, (gray[:,:-1].ravel(), gray[:,1:].ravel()), 1)
	glcm/=(glcm.sum()+1e-9)
	energy=float(np.sum(glcm**2))
	I,J=np.mgrid[0:16,0:16]; homog=float(np.sum(glcm/(1+np.abs(I-J))))
	if energy<0.05 and homog>0.5: s,n=-0.2,f"Natural texture (E={energy:.4f}, H={homog:.3f})"
	elif energy>0.2: s,n=0.3,f"Flat/repetitive (E={energy:.4f})"
	else: s,n=0.0,f"GLCM E={energy:.4f}, H={homog:.3f}"
	return {"test":"Co-occurrence Matrix","energy":round(energy,4),"homogeneity":round(homog,4),"score":s,"note":n}

	def t11_block_variance(img):
	gray=_g(img); h,w=gray.shape; bs=8; hc,wc=(h//bs)bs,(w//bs)bs
	gray=gray[:hc,:wc]; bvars=[]
	for i in range(0,hc,bs):
	for j in range(0,wc,bs):
	bvars.append(float(np.var(gray[i:i+bs,j:j+bs])))
	bv=np.array(bvars)
	# ANOVA-like test: variance of variances
	vov=float(np.std(bv))/(float(np.mean(bv))+1e-9)
	if vov>1: s,n=-0.2,f"Varied block variance (VoV={vov:.3f})"
	elif vov<0.3: s,n=0.3,f"Uniform block variance ({vov:.3f})"
	else: s,n=0.0,f"VoV={vov:.3f}"
	return {"test":"Block Variance ANOVA","vov":round(vov,4),"score":s,"note":n}

	def t12_gradient_magnitude(img):
	gray=_g(img); gm=np.hypot(sobel(gray,0),sobel(gray,1))
	k=float(sp_kurt(gm.ravel(),fisher=True)); sk=float(sp_skew(gm.ravel()))
	if k>5: s,n=-0.2,f"Heavy-tailed gradients (κ={k:.2f})"
	elif k<2: s,n=0.3,f"Light-tailed ({k:.2f})"
	else: s,n=0.0,f"Gradient κ={k:.2f}"
	return {"test":"Gradient Magnitude Dist","kurtosis":round(k,3),"skewness":round(sk,3),"score":s,"note":n}

	def t13_spatial_correlation(img):
	gray=_g(img); h,w=gray.shape; step=max(1,h*w//200000)
	ac1=float(np.corrcoef(gray[:,:-1].ravel()[::step],gray[:,1:].ravel()[::step])[0,1])
	ac5=float(np.corrcoef(gray[:,:-5].ravel()[::step],gray[:,5:].ravel()[::step])[0,1])
	decay=ac1-ac5
	if 0.05<decay<0.3: s,n=-0.2,f"Natural correlation decay ({decay:.3f})"
	elif decay<0.01: s,n=0.3,f"Flat correlation ({decay:.3f})"
	else: s,n=0.0,f"Decay={decay:.3f}"
	return {"test":"Spatial Correlation Decay","decay":round(decay,4),"score":s,"note":n}

	def t14_dct_skewness(img):
	gray=_g(img); h,w=gray.shape; hc,wc=(h//8)8,(w//8)8; gray=gray[:hc,:wc]
	coeffs=[]
	for i in range(0,hc,8):
	for j in range(0,wc,8):
	d=dct(dct(gray[i:i+8,j:j+8].T,norm="ortho").T,norm="ortho"); ac=d.copy(); ac[0,0]=0
	coeffs.extend(ac.ravel().tolist())
	c=np.array(coeffs); c=c[c!=0]
	if len(c)<100: return {"test":"DCT Skewness","score":0.0,"note":"Insufficient"}
	sk=float(sp_skew(c))
	if abs(sk)<0.1: s,n=-0.2,f"Symmetric DCT (skew={sk:.3f})"
	elif abs(sk)>0.5: s,n=0.3,f"Skewed DCT ({sk:.3f})"
	else: s,n=0.0,f"DCT skew={sk:.3f}"
	return {"test":"DCT Skewness","skewness":round(sk,4),"score":s,"note":n}

	def t15_saturation_distribution(img):
	rgb=np.array(img.convert("RGB")).astype(float)
	mx=np.max(rgb,axis=-1); mn=np.min(rgb,axis=-1)
	sat=(mx-mn)/(mx+1e-9); sat_flat=sat.ravel()
	k=float(sp_kurt(sat_flat,fisher=True))
	if k>3: s,n=-0.2,f"Natural saturation (κ={k:.2f})"
	elif k<1: s,n=0.3,f"Unusual saturation ({k:.2f})"
	else: s,n=0.0,f"Saturation κ={k:.2f}"
	return {"test":"Saturation Distribution","kurtosis":round(k,3),"score":s,"note":n}

	def t16_luminance_gradient_ratio(img):
	gray=_g(img); gx=np.abs(np.diff(gray,axis=1)); gy=np.abs(np.diff(gray,axis=0))
	hg=float(np.mean(gx)); vg=float(np.mean(gy))
	ratio=hg/(vg+1e-9)
	if 0.7<ratio<1.4: s,n=-0.1,f"Balanced H/V gradients ({ratio:.3f})"
	elif ratio>2 or ratio<0.5: s,n=0.2,f"Extreme H/V bias ({ratio:.3f})"
	else: s,n=0.0,f"H/V ratio={ratio:.3f}"
	return {"test":"H/V Gradient Ratio","ratio":round(ratio,3),"score":s,"note":n}

	def t17_pixel_uniqueness(img):
	gray=np.array(img.convert("L")); total=gray.size; unique=len(np.unique(gray))
	ratio=unique/256
	if ratio>0.9: s,n=-0.1,f"Full tonal range ({unique} levels)"
	elif ratio<0.5: s,n=0.2,f"Limited range ({unique} levels)"
	else: s,n=0.0,f"{unique} levels"
	return {"test":"Pixel Uniqueness","levels":unique,"score":s,"note":n}

	def t18_global_entropy(img):
	gray=np.array(img.convert("L")); hist,_=np.histogram(gray,bins=256,range=(0,256))
	hist=hist.astype(float); hist/=(hist.sum()+1e-9)
	ent=-float(np.sum(hist*np.log2(hist+1e-12)))
	if 6<ent<7.8: s,n=-0.2,f"Natural entropy ({ent:.3f})"
	elif ent<5: s,n=0.3,f"Low entropy ({ent:.3f})"
	else: s,n=0.0,f"Entropy={ent:.3f}"
	return {"test":"Global Entropy","entropy":round(ent,4),"score":s,"note":n}

	def t19_power_law_fit(img):
	gray=_g(img); gm=np.hypot(sobel(gray,0),sobel(gray,1)).ravel()
	gm=gm[gm>1]; hist,edges=np.histogram(gm,bins=50); hist=hist.astype(float)+1
	centers=(edges[:-1]+edges[1:])/2; valid=hist>1
	if np.sum(valid)<5: return {"test":"Power Law Gradient","score":0.0,"note":"Insufficient"}
	try:
	c=np.polyfit(np.log(centers[valid]),np.log(hist[valid]),1); slope=float(c[0])
	except: slope=0
	if -3<slope<-1: s,n=-0.2,f"Power-law gradients (α={slope:.2f})"
	elif slope>-0.5: s,n=0.3,f"Non-power-law ({slope:.2f})"
	else: s,n=0.0,f"Slope={slope:.2f}"
	return {"test":"Power Law Gradient","slope":round(slope,3),"score":s,"note":n}

	def t20_contrast_distribution(img):
	gray=_g(img); h,w=gray.shape; bs=16
	contrasts=[]
	for i in range(0,h-bs,bs):
	for j in range(0,w-bs,bs):
	b=gray[i:i+bs,j:j+bs]; contrasts.append(float(np.max(b)-np.min(b)))
	c=np.array(contrasts)
	if len(c)<10: return {"test":"Contrast Distribution","score":0.0,"note":"Insufficient"}
	k=float(sp_kurt(c,fisher=True))
	if k>2: s,n=-0.2,f"Natural contrast variation (κ={k:.2f})"
	elif k<0.5: s,n=0.2,f"Uniform contrast ({k:.2f})"
	else: s,n=0.0,f"Contrast κ={k:.2f}"
	return {"test":"Contrast Distribution","kurtosis":round(k,3),"score":s,"note":n}

	def t21_joint_histogram(img):
	rgb=np.array(img.convert("RGB")); r,g=rgb[:,:,0].ravel(),rgb[:,:,1].ravel()
	step=max(1,len(r)//100000)
	h2d,_,_=np.histogram2d(r[::step],g[::step],bins=32,range=[[0,256],[0,256]])
	h2d/=(h2d.sum()+1e-9)
	# Mutual information
	hr=np.sum(h2d,axis=1); hg=np.sum(h2d,axis=0)
	mi=float(np.sum(h2d*np.log2(h2d/(np.outer(hr,hg)+1e-12)+1e-12)))
	if mi>0.5: s,n=-0.2,f"Natural color correlation (MI={mi:.3f})"
	elif mi<0.1: s,n=0.2,f"Weak color correlation ({mi:.3f})"
	else: s,n=0.0,f"MI={mi:.3f}"
	return {"test":"Joint Color Histogram","mi":round(mi,4),"score":s,"note":n}

	def t22_run_length(img):
	gray=np.array(img.convert("L")); h,w=gray.shape
	# Sample 10 rows and 10 columns spread across the image
	all_runs=[]
	row_indices = np.linspace(0, h-1, min(10, h), dtype=int)
	col_indices = np.linspace(0, w-1, min(10, w), dtype=int)
	for ri in row_indices:
	row=gray[ri,:]; cur=1
	for i in range(1,len(row)):
	if row[i]==row[i-1]: cur+=1
	else: all_runs.append(cur); cur=1
	all_runs.append(cur)
	for ci in col_indices:
	col=gray[:,ci]; cur=1
	for i in range(1,len(col)):
	if col[i]==col[i-1]: cur+=1
	else: all_runs.append(cur); cur=1
	all_runs.append(cur)
	runs=np.array(all_runs)
	if len(runs)<10: return {"test":"Run Length Analysis","score":0.0,"note":"Insufficient data"}
	avg_run=float(np.mean(runs))
	if 1<avg_run<5: s,n=-0.2,f"Natural run lengths (avg={avg_run:.2f})"
	elif avg_run>10: s,n=0.3,f"Long runs ({avg_run:.2f}) — flat patches"
	else: s,n=0.0,f"Run avg={avg_run:.2f}"
	return {"test":"Run Length Analysis","avg_run":round(avg_run,3),"score":s,"note":n}

	ALL_TESTS=[t01_dct_kurtosis,t02_benford,t03_gradient_sparsity,t04_local_kurtosis,t05_color_histogram,
	t06_wavelet_kurtosis,t07_entropy_map,t08_edge_orientation,t09_lbp_distribution,t10_cooccurrence,
	t11_block_variance,t12_gradient_magnitude,t13_spatial_correlation,t14_dct_skewness,
	t15_saturation_distribution,t16_luminance_gradient_ratio,t17_pixel_uniqueness,t18_global_entropy,
	t19_power_law_fit,t20_contrast_distribution,t21_joint_histogram,t22_run_length]

	def run_statistical_agent(img, modality_adjustments=None):
	from agents.utils import run_agent_tests
	from agents.optical_agent import AgentEvidence
	findings, avg, conf, fail, rat = run_agent_tests(ALL_TESTS, img, "Statistical Priors Agent", modality_adjustments)
	return AgentEvidence("Statistical Priors Agent", np.clip(avg,-1,1), conf, fail, rat, findings)