Spaces:

anky2002
/

FORENSIQ

Running

File size: 16,440 Bytes

"""FORENSIQ — Statistical Priors Agent (22 features)"""
import numpy as np
from PIL import Image
from scipy.fftpack import dct
from scipy.stats import kurtosis as sp_kurt, skew as sp_skew, entropy as sp_entropy
from scipy.ndimage import gaussian_filter, sobel, uniform_filter
from typing import Dict, Any
from agents.optical_agent import AgentEvidence

def _g(img): return np.array(img.convert("L")).astype(np.float64)
def _rgb(img): return np.array(img.convert("RGB")).astype(np.float64)

def t01_dct_kurtosis(img):
    gray=_g(img); h,w=gray.shape; hc,wc=(h//8)*8,(w//8)*8; gray=gray[:hc,:wc]
    coeffs=[]
    for i in range(0,hc,8):
        for j in range(0,wc,8):
            b=gray[i:i+8,j:j+8]; d=dct(dct(b.T,norm="ortho").T,norm="ortho"); ac=d.copy(); ac[0,0]=0
            coeffs.extend(ac.ravel().tolist())
    c=np.array(coeffs); c=c[c!=0]
    if len(c)<100: return {"test":"DCT Kurtosis","score":0.0,"note":"Insufficient data"}
    k=float(sp_kurt(c,fisher=True))
    # Natural photos: kurtosis 5-350. AI over-sharpening: >400.
    # Too-low kurtosis (<2) = Gaussian = old GAN artifacts.
    # Too-high kurtosis (>400) = pathological sharpness = modern AI.
    if k>400: s,n=0.3,f"Pathologically high DCT kurtosis (κ={k:.0f}) — AI over-sharpening"
    elif k>4.5: s,n=-0.4,f"Laplacian DCT (κ={k:.2f})"
    elif k<2.0: s,n=0.5,f"Gaussian DCT (κ={k:.2f})"
    elif k<3.5: s,n=0.2,f"Borderline (κ={k:.2f})"
    else: s,n=-0.1,f"Near-natural (κ={k:.2f})"
    return {"test":"DCT Kurtosis","kurtosis":round(k,4),"score":s,"note":n}

def t02_benford(img):
    gray=_g(img); h,w=gray.shape; hc,wc=(h//8)*8,(w//8)*8; gray=gray[:hc,:wc]
    coeffs=[]
    for i in range(0,hc,8):
        for j in range(0,wc,8):
            coeffs.extend(np.abs(dct(dct(gray[i:i+8,j:j+8].T,norm="ortho").T,norm="ortho").ravel()).tolist())
    c=np.array(coeffs); nz=c[c>0]
    if len(nz)<100: return {"test":"Benford's Law","score":0.0,"note":"Insufficient"}
    lv=np.floor(np.log10(nz+1e-12)); fd=np.floor(nz/(10**lv)).astype(int); fd=fd[(fd>=1)&(fd<=9)]
    obs=np.array([np.sum(fd==d) for d in range(1,10)],dtype=float); obs/=(obs.sum()+1e-9)
    ben=np.log10(1+1.0/np.arange(1,10))
    chi2=float(np.sum((obs-ben)**2/(ben+1e-9)))
    if chi2<0.005: s,n=-0.4,f"Excellent Benford fit (χ²={chi2:.5f})"
    elif chi2<0.02: s,n=-0.1,f"Good fit (χ²={chi2:.5f})"
    elif chi2<0.05: s,n=0.3,f"Moderate deviation (χ²={chi2:.5f})"
    else: s,n=0.6,f"Strong violation (χ²={chi2:.5f})"
    return {"test":"Benford's Law","chi2":round(chi2,6),"observed":obs.tolist(),"benford_expected":ben.tolist(),"score":s,"note":n}

def t03_gradient_sparsity(img):
    gray=_g(img); gx=np.diff(gray,axis=1).ravel(); gy=np.diff(gray,axis=0).ravel()
    ag=np.concatenate([gx,gy]); k=float(sp_kurt(ag,fisher=True))
    thr=np.std(ag)*0.1; sp=float(np.mean(np.abs(ag)<thr))
    if k>5 and sp>0.4: s,n=-0.4,f"Sparse gradients (κ={k:.2f}, sp={sp:.2f})"
    elif k<2: s,n=0.5,f"Low kurtosis ({k:.2f})"
    elif k<3.5: s,n=0.2,f"Borderline ({k:.2f})"
    else: s,n=-0.1,f"Normal (κ={k:.2f})"
    return {"test":"Gradient Sparsity","kurtosis":round(k,4),"sparsity":round(sp,4),"score":s,"note":n}

def t04_local_kurtosis(img):
    gray=_g(img); h,w=gray.shape; bs=32; hc,wc=(h//bs)*bs,(w//bs)*bs; gray=gray[:hc,:wc]
    lk=[]
    for i in range(0,hc,bs):
        for j in range(0,wc,bs):
            b=gray[i:i+bs,j:j+bs].ravel()
            if np.std(b)>1: lk.append(float(sp_kurt(b,fisher=True)))
    if len(lk)<10: return {"test":"Local Kurtosis Map","score":0.0,"note":"Insufficient"}
    std=float(np.std(lk))
    if std>3: s,n=-0.3,f"High kurtosis variation (σ={std:.2f})"
    elif std<1: s,n=0.4,f"Uniform statistics (σ={std:.2f})"
    else: s,n=0.0,f"Moderate (σ={std:.2f})"
    return {"test":"Local Kurtosis Map","kurtosis_std":round(std,4),"score":s,"note":n}

def t05_color_histogram(img):
    rgb=np.array(img.convert("RGB")); scores=[]
    for c in range(3):
        h,_=np.histogram(rgb[:,:,c].ravel(),bins=256,range=(0,256))
        sm=gaussian_filter(h.astype(float),2)
        scores.append(float(np.mean(np.abs(h-sm))/(np.mean(h)+1e-9)))
    avg=float(np.mean(scores))
    if avg<0.3: s,n=-0.2,f"Smooth histograms ({avg:.3f})"
    elif avg>0.8: s,n=0.4,f"Irregular histograms ({avg:.3f})"
    else: s,n=0.0,f"Histogram smoothness={avg:.3f}"
    return {"test":"Color Histogram","smoothness":round(avg,4),"score":s,"note":n}

def t06_wavelet_kurtosis(img):
    gray=_g(img); h,w=gray.shape; h2,w2=h//2*2,w//2*2; gray=gray[:h2,:w2]
    lh=(gray[0::2,0::2]+gray[0::2,1::2]-gray[1::2,0::2]-gray[1::2,1::2])/4
    hl=(gray[0::2,0::2]-gray[0::2,1::2]+gray[1::2,0::2]-gray[1::2,1::2])/4
    hh=(gray[0::2,0::2]-gray[0::2,1::2]-gray[1::2,0::2]+gray[1::2,1::2])/4
    hf=np.concatenate([lh.ravel(),hl.ravel(),hh.ravel()]); hf=hf[hf!=0]
    if len(hf)<100: return {"test":"Wavelet Kurtosis","score":0.0,"note":"Insufficient"}
    k=float(sp_kurt(hf,fisher=True))
    # Same ceiling logic: AI over-sharpening produces kurtosis > 60
    if k>60: s,n=0.2,f"Pathologically high wavelet kurtosis (κ={k:.1f}) — AI over-sharpening"
    elif k>5: s,n=-0.3,f"Heavy-tailed wavelets (κ={k:.2f})"
    elif k<1.5: s,n=0.4,f"Gaussian wavelets (κ={k:.2f})"
    else: s,n=0.0,f"Wavelet κ={k:.2f}"
    return {"test":"Wavelet Kurtosis","kurtosis":round(k,4),"score":s,"note":n}

def t07_entropy_map(img):
    gray=_g(img); h,w=gray.shape; bs=32; ents=[]
    for i in range(0,h-bs,bs):
        for j in range(0,w-bs,bs):
            b=gray[i:i+bs,j:j+bs].ravel().astype(int)
            h_,_=np.histogram(b,bins=64,range=(0,256)); h_=h_.astype(float); h_/=(h_.sum()+1e-9)
            ents.append(-float(np.sum(h_*np.log2(h_+1e-12))))
    if len(ents)<4: return {"test":"Entropy Map","score":0.0,"note":"Too small"}
    std=float(np.std(ents)); mn=float(np.mean(ents))
    if std>0.5: s,n=-0.2,f"Varied local entropy (σ={std:.2f})"
    elif std<0.15: s,n=0.3,f"Uniform entropy (σ={std:.2f})"
    else: s,n=0.0,f"Entropy σ={std:.2f}"
    return {"test":"Entropy Map","entropy_std":round(std,4),"mean":round(mn,4),"score":s,"note":n}

def t08_edge_orientation(img):
    gray=_g(img); gx=sobel(gray,1); gy=sobel(gray,0); mag=np.hypot(gx,gy)
    strong=mag>np.percentile(mag,80); angles=np.arctan2(gy[strong],gx[strong])
    hist,_=np.histogram(angles,bins=36,range=(-np.pi,np.pi)); hist=hist.astype(float); hist/=(hist.sum()+1e-9)
    ent=-float(np.sum(hist*np.log(hist+1e-9)))
    max_ent=np.log(36)
    norm_ent=ent/max_ent
    if norm_ent<0.85: s,n=-0.2,f"Directional edges (entropy={norm_ent:.3f})"
    elif norm_ent>0.95: s,n=0.2,f"Isotropic edges ({norm_ent:.3f})"
    else: s,n=0.0,f"Edge entropy={norm_ent:.3f}"
    return {"test":"Edge Orientation","entropy":round(norm_ent,4),"score":s,"note":n}

def t09_lbp_distribution(img):
    gray=np.array(img.convert("L")); h,w=gray.shape
    # Simplified LBP
    lbp=np.zeros((h-2,w-2),dtype=int)
    for dy,dx,bit in [(-1,-1,0),(-1,0,1),(-1,1,2),(0,1,3),(1,1,4),(1,0,5),(1,-1,6),(0,-1,7)]:
        lbp|=((gray[1+dy:h-1+dy,1+dx:w-1+dx]>=gray[1:h-1,1:w-1]).astype(int)<<bit)
    hist,_=np.histogram(lbp.ravel(),bins=256,range=(0,256)); hist=hist.astype(float); hist/=(hist.sum()+1e-9)
    # Uniform LBP patterns (≤2 transitions) dominate in natural images
    uniform=0
    for v in range(256):
        b=format(v,'08b'); t=sum(1 for i in range(7) if b[i]!=b[i+1])+int(b[0]!=b[7])
        if t<=2: uniform+=hist[v]
    if uniform>0.6: s,n=-0.2,f"Natural LBP (uniform={uniform:.2%})"
    elif uniform<0.3: s,n=0.3,f"Non-uniform LBP ({uniform:.2%})"
    else: s,n=0.0,f"LBP uniform={uniform:.2%}"
    return {"test":"LBP Distribution","uniform_ratio":round(uniform,4),"score":s,"note":n}

def t10_cooccurrence(img):
    gray=(np.array(img.convert("L"))//16).astype(int); h,w=gray.shape
    # Vectorized GLCM — horizontal adjacency
    glcm=np.zeros((16,16))
    np.add.at(glcm, (gray[:,:-1].ravel(), gray[:,1:].ravel()), 1)
    glcm/=(glcm.sum()+1e-9)
    energy=float(np.sum(glcm**2))
    I,J=np.mgrid[0:16,0:16]; homog=float(np.sum(glcm/(1+np.abs(I-J))))
    if energy<0.05 and homog>0.5: s,n=-0.2,f"Natural texture (E={energy:.4f}, H={homog:.3f})"
    elif energy>0.2: s,n=0.3,f"Flat/repetitive (E={energy:.4f})"
    else: s,n=0.0,f"GLCM E={energy:.4f}, H={homog:.3f}"
    return {"test":"Co-occurrence Matrix","energy":round(energy,4),"homogeneity":round(homog,4),"score":s,"note":n}

def t11_block_variance(img):
    gray=_g(img); h,w=gray.shape; bs=8; hc,wc=(h//bs)*bs,(w//bs)*bs
    gray=gray[:hc,:wc]; bvars=[]
    for i in range(0,hc,bs):
        for j in range(0,wc,bs):
            bvars.append(float(np.var(gray[i:i+bs,j:j+bs])))
    bv=np.array(bvars)
    # ANOVA-like test: variance of variances
    vov=float(np.std(bv))/(float(np.mean(bv))+1e-9)
    if vov>1: s,n=-0.2,f"Varied block variance (VoV={vov:.3f})"
    elif vov<0.3: s,n=0.3,f"Uniform block variance ({vov:.3f})"
    else: s,n=0.0,f"VoV={vov:.3f}"
    return {"test":"Block Variance ANOVA","vov":round(vov,4),"score":s,"note":n}

def t12_gradient_magnitude(img):
    gray=_g(img); gm=np.hypot(sobel(gray,0),sobel(gray,1))
    k=float(sp_kurt(gm.ravel(),fisher=True)); sk=float(sp_skew(gm.ravel()))
    if k>5: s,n=-0.2,f"Heavy-tailed gradients (κ={k:.2f})"
    elif k<2: s,n=0.3,f"Light-tailed ({k:.2f})"
    else: s,n=0.0,f"Gradient κ={k:.2f}"
    return {"test":"Gradient Magnitude Dist","kurtosis":round(k,3),"skewness":round(sk,3),"score":s,"note":n}

def t13_spatial_correlation(img):
    gray=_g(img); h,w=gray.shape; step=max(1,h*w//200000)
    ac1=float(np.corrcoef(gray[:,:-1].ravel()[::step],gray[:,1:].ravel()[::step])[0,1])
    ac5=float(np.corrcoef(gray[:,:-5].ravel()[::step],gray[:,5:].ravel()[::step])[0,1])
    decay=ac1-ac5
    if 0.05<decay<0.3: s,n=-0.2,f"Natural correlation decay ({decay:.3f})"
    elif decay<0.01: s,n=0.3,f"Flat correlation ({decay:.3f})"
    else: s,n=0.0,f"Decay={decay:.3f}"
    return {"test":"Spatial Correlation Decay","decay":round(decay,4),"score":s,"note":n}

def t14_dct_skewness(img):
    gray=_g(img); h,w=gray.shape; hc,wc=(h//8)*8,(w//8)*8; gray=gray[:hc,:wc]
    coeffs=[]
    for i in range(0,hc,8):
        for j in range(0,wc,8):
            d=dct(dct(gray[i:i+8,j:j+8].T,norm="ortho").T,norm="ortho"); ac=d.copy(); ac[0,0]=0
            coeffs.extend(ac.ravel().tolist())
    c=np.array(coeffs); c=c[c!=0]
    if len(c)<100: return {"test":"DCT Skewness","score":0.0,"note":"Insufficient"}
    sk=float(sp_skew(c))
    if abs(sk)<0.1: s,n=-0.2,f"Symmetric DCT (skew={sk:.3f})"
    elif abs(sk)>0.5: s,n=0.3,f"Skewed DCT ({sk:.3f})"
    else: s,n=0.0,f"DCT skew={sk:.3f}"
    return {"test":"DCT Skewness","skewness":round(sk,4),"score":s,"note":n}

def t15_saturation_distribution(img):
    rgb=np.array(img.convert("RGB")).astype(float)
    mx=np.max(rgb,axis=-1); mn=np.min(rgb,axis=-1)
    sat=(mx-mn)/(mx+1e-9); sat_flat=sat.ravel()
    k=float(sp_kurt(sat_flat,fisher=True))
    if k>3: s,n=-0.2,f"Natural saturation (κ={k:.2f})"
    elif k<1: s,n=0.3,f"Unusual saturation ({k:.2f})"
    else: s,n=0.0,f"Saturation κ={k:.2f}"
    return {"test":"Saturation Distribution","kurtosis":round(k,3),"score":s,"note":n}

def t16_luminance_gradient_ratio(img):
    gray=_g(img); gx=np.abs(np.diff(gray,axis=1)); gy=np.abs(np.diff(gray,axis=0))
    hg=float(np.mean(gx)); vg=float(np.mean(gy))
    ratio=hg/(vg+1e-9)
    if 0.7<ratio<1.4: s,n=-0.1,f"Balanced H/V gradients ({ratio:.3f})"
    elif ratio>2 or ratio<0.5: s,n=0.2,f"Extreme H/V bias ({ratio:.3f})"
    else: s,n=0.0,f"H/V ratio={ratio:.3f}"
    return {"test":"H/V Gradient Ratio","ratio":round(ratio,3),"score":s,"note":n}

def t17_pixel_uniqueness(img):
    gray=np.array(img.convert("L")); total=gray.size; unique=len(np.unique(gray))
    ratio=unique/256
    if ratio>0.9: s,n=-0.1,f"Full tonal range ({unique} levels)"
    elif ratio<0.5: s,n=0.2,f"Limited range ({unique} levels)"
    else: s,n=0.0,f"{unique} levels"
    return {"test":"Pixel Uniqueness","levels":unique,"score":s,"note":n}

def t18_global_entropy(img):
    gray=np.array(img.convert("L")); hist,_=np.histogram(gray,bins=256,range=(0,256))
    hist=hist.astype(float); hist/=(hist.sum()+1e-9)
    ent=-float(np.sum(hist*np.log2(hist+1e-12)))
    if 6<ent<7.8: s,n=-0.2,f"Natural entropy ({ent:.3f})"
    elif ent<5: s,n=0.3,f"Low entropy ({ent:.3f})"
    else: s,n=0.0,f"Entropy={ent:.3f}"
    return {"test":"Global Entropy","entropy":round(ent,4),"score":s,"note":n}

def t19_power_law_fit(img):
    gray=_g(img); gm=np.hypot(sobel(gray,0),sobel(gray,1)).ravel()
    gm=gm[gm>1]; hist,edges=np.histogram(gm,bins=50); hist=hist.astype(float)+1
    centers=(edges[:-1]+edges[1:])/2; valid=hist>1
    if np.sum(valid)<5: return {"test":"Power Law Gradient","score":0.0,"note":"Insufficient"}
    try:
        c=np.polyfit(np.log(centers[valid]),np.log(hist[valid]),1); slope=float(c[0])
    except: slope=0
    if -3<slope<-1: s,n=-0.2,f"Power-law gradients (α={slope:.2f})"
    elif slope>-0.5: s,n=0.3,f"Non-power-law ({slope:.2f})"
    else: s,n=0.0,f"Slope={slope:.2f}"
    return {"test":"Power Law Gradient","slope":round(slope,3),"score":s,"note":n}

def t20_contrast_distribution(img):
    gray=_g(img); h,w=gray.shape; bs=16
    contrasts=[]
    for i in range(0,h-bs,bs):
        for j in range(0,w-bs,bs):
            b=gray[i:i+bs,j:j+bs]; contrasts.append(float(np.max(b)-np.min(b)))
    c=np.array(contrasts)
    if len(c)<10: return {"test":"Contrast Distribution","score":0.0,"note":"Insufficient"}
    k=float(sp_kurt(c,fisher=True))
    if k>2: s,n=-0.2,f"Natural contrast variation (κ={k:.2f})"
    elif k<0.5: s,n=0.2,f"Uniform contrast ({k:.2f})"
    else: s,n=0.0,f"Contrast κ={k:.2f}"
    return {"test":"Contrast Distribution","kurtosis":round(k,3),"score":s,"note":n}

def t21_joint_histogram(img):
    rgb=np.array(img.convert("RGB")); r,g=rgb[:,:,0].ravel(),rgb[:,:,1].ravel()
    step=max(1,len(r)//100000)
    h2d,_,_=np.histogram2d(r[::step],g[::step],bins=32,range=[[0,256],[0,256]])
    h2d/=(h2d.sum()+1e-9)
    # Mutual information
    hr=np.sum(h2d,axis=1); hg=np.sum(h2d,axis=0)
    mi=float(np.sum(h2d*np.log2(h2d/(np.outer(hr,hg)+1e-12)+1e-12)))
    if mi>0.5: s,n=-0.2,f"Natural color correlation (MI={mi:.3f})"
    elif mi<0.1: s,n=0.2,f"Weak color correlation ({mi:.3f})"
    else: s,n=0.0,f"MI={mi:.3f}"
    return {"test":"Joint Color Histogram","mi":round(mi,4),"score":s,"note":n}

def t22_run_length(img):
    gray=np.array(img.convert("L")); h,w=gray.shape
    # Sample 10 rows and 10 columns spread across the image
    all_runs=[]
    row_indices = np.linspace(0, h-1, min(10, h), dtype=int)
    col_indices = np.linspace(0, w-1, min(10, w), dtype=int)
    for ri in row_indices:
        row=gray[ri,:]; cur=1
        for i in range(1,len(row)):
            if row[i]==row[i-1]: cur+=1
            else: all_runs.append(cur); cur=1
        all_runs.append(cur)
    for ci in col_indices:
        col=gray[:,ci]; cur=1
        for i in range(1,len(col)):
            if col[i]==col[i-1]: cur+=1
            else: all_runs.append(cur); cur=1
        all_runs.append(cur)
    runs=np.array(all_runs)
    if len(runs)<10: return {"test":"Run Length Analysis","score":0.0,"note":"Insufficient data"}
    avg_run=float(np.mean(runs))
    if 1<avg_run<5: s,n=-0.2,f"Natural run lengths (avg={avg_run:.2f})"
    elif avg_run>10: s,n=0.3,f"Long runs ({avg_run:.2f}) — flat patches"
    else: s,n=0.0,f"Run avg={avg_run:.2f}"
    return {"test":"Run Length Analysis","avg_run":round(avg_run,3),"score":s,"note":n}

ALL_TESTS=[t01_dct_kurtosis,t02_benford,t03_gradient_sparsity,t04_local_kurtosis,t05_color_histogram,
           t06_wavelet_kurtosis,t07_entropy_map,t08_edge_orientation,t09_lbp_distribution,t10_cooccurrence,
           t11_block_variance,t12_gradient_magnitude,t13_spatial_correlation,t14_dct_skewness,
           t15_saturation_distribution,t16_luminance_gradient_ratio,t17_pixel_uniqueness,t18_global_entropy,
           t19_power_law_fit,t20_contrast_distribution,t21_joint_histogram,t22_run_length]

def run_statistical_agent(img, modality_adjustments=None):
    from agents.utils import run_agent_tests
    from agents.optical_agent import AgentEvidence
    findings, avg, conf, fail, rat = run_agent_tests(ALL_TESTS, img, "Statistical Priors Agent", modality_adjustments)
    return AgentEvidence("Statistical Priors Agent", np.clip(avg,-1,1), conf, fail, rat, findings)