FORENSIQ / agents /modality_detector.py
anky2002's picture
feat: overhaul screenshot detection + suppression + explanation text (v7)
7f80464 verified
"""
FORENSIQ β€” Capture Modality Detector v7
v7: Overhauled SCREENSHOT detection β€” now detects OS screenshots (pixel-perfect,
standard resolution, few unique colors, no Bayer CFA, uniform sharpness) in
addition to photographed screens (H/V edge dominance). Expanded SCREENSHOT
suppression from 15 to 30+ tests covering lens physics, sensor characteristics,
LCD pixel grid frequency artifacts (Diffusion Notches), and UI color distribution
violations (Benford's Law, Color Histogram).
v6: Added Vignetting cos⁴θ, Bokeh Shape, and Fixed Pattern Noise Γ—0.3 suppression
for MACRO_DSLR modality.
v5: Added minimum bayer_margin threshold (0.005) to prevent JPEG re-encoding
from creating false Bayer CFA patterns on AI images.
"""
import sys
import numpy as np
from PIL import Image
from scipy.ndimage import gaussian_filter, sobel
from scipy.signal import convolve2d
from dataclasses import dataclass
@dataclass
class ModalityResult:
modality: str
confidence: float
indicators: dict
score_adjustments: dict
# Minimum Bayer CFA margin β€” below this, the "Bayer" signal is likely
# JPEG encoding artifacts, not a real camera sensor signature.
# Real cameras: margin > 0.01. JPEG artifacts: margin < 0.001.
MIN_BAYER_MARGIN = 0.005
def detect_modality(img: Image.Image) -> ModalityResult:
"""Detect capture modality from image content and metadata."""
indicators = {}
scores = {}
w, h = img.size
gray = np.array(img.convert("L")).astype(np.float64)
rgb = np.array(img.convert("RGB")).astype(np.float64)
# ═══ CRITICAL PRE-CHECK: Bayer CFA pattern ═══════════════════════
noise_std = {}
for c, nm in enumerate(["red", "green", "blue"]):
ch = rgb[:, :, c]
dn = gaussian_filter(ch, sigma=1.5)
noise_std[nm] = float(np.std(ch - dn))
bayer_margin = min(noise_std["red"], noise_std["blue"]) - noise_std["green"]
has_bayer = (noise_std["green"] < min(noise_std["red"], noise_std["blue"])
and bayer_margin > MIN_BAYER_MARGIN)
indicators["has_bayer"] = has_bayer
indicators["bayer_margin"] = round(bayer_margin, 4)
indicators["bayer_margin_threshold"] = MIN_BAYER_MARGIN
indicators["noise_g"] = round(noise_std["green"], 3)
indicators["noise_r"] = round(noise_std["red"], 3)
indicators["noise_b"] = round(noise_std["blue"], 3)
# ═══ CONTENT-BASED DETECTION ═════════════════════════════════════
# ── Sharpness analysis ────────────────────────────────────────────
lap = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float64)
laplacian = convolve2d(gray, lap, mode="same", boundary="symm")
sharpness = gaussian_filter(np.abs(laplacian), sigma=max(10, min(h, w) // 80))
p25 = float(np.percentile(sharpness, 25))
p50 = float(np.percentile(sharpness, 50))
p75 = float(np.percentile(sharpness, 75))
p95 = float(np.percentile(sharpness, 95))
iqr = p75 - p25
bimodal_ratio = iqr / (p50 + 1e-9)
sharp_thresh = p75
sharp_region = sharpness > sharp_thresh
sharp_frac = float(np.mean(sharp_region))
blur_content_thresh = p95 * 0.20
blur_region = sharpness < blur_content_thresh
blur_frac = float(np.mean(blur_region))
blur_vals = sharpness[blur_region] if np.any(blur_region) else np.array([1])
blur_uniformity = 1.0 - min(float(np.std(blur_vals)) / (float(np.mean(blur_vals)) + 1e-9), 1.0)
sharpness_grad = np.hypot(sobel(sharpness, 0), sobel(sharpness, 1))
max_grad = float(np.percentile(sharpness_grad, 99))
mean_grad = float(np.mean(sharpness_grad))
transition = max_grad / (mean_grad + 1e-9)
has_detail = p95 > 5.0
indicators["p95_sharpness"] = round(p95, 2)
indicators["bimodal_ratio"] = round(bimodal_ratio, 3)
indicators["blur_frac"] = round(blur_frac, 3)
indicators["blur_uniformity"] = round(blur_uniformity, 3)
indicators["transition_abruptness"] = round(transition, 2)
indicators["has_detail"] = has_detail
# ── Portrait mode detection ───────────────────────────────────────
can_be_portrait = has_detail and has_bayer
portrait_score = 0.0
if can_be_portrait and bimodal_ratio > 1.0:
portrait_score += 0.25
if can_be_portrait and blur_uniformity > 0.5:
portrait_score += 0.2
if can_be_portrait and transition > 4.0:
portrait_score += 0.2
if can_be_portrait and blur_frac > 0.2 and sharp_frac > 0.1:
portrait_score += 0.15
if portrait_score > 0.3 and has_bayer:
scores["PORTRAIT_MODE"] = portrait_score
indicators["portrait_detected"] = True
# ── Macro/DSLR shallow DoF detection ─────────────────────────────
ch_s, cw_s = gray.shape
center_region = sharpness[ch_s//4:3*ch_s//4, cw_s//4:3*cw_s//4]
edge_region = np.concatenate([
sharpness[:ch_s//4, :].ravel(),
sharpness[3*ch_s//4:, :].ravel(),
sharpness[:, :cw_s//4].ravel(),
sharpness[:, 3*cw_s//4:].ravel(),
])
center_sharp = float(np.percentile(center_region, 90))
edge_sharp = float(np.mean(edge_region))
sharpness_ratio = center_sharp / (edge_sharp + 1e-9)
blur_region_pixels = gray[blur_region] if np.any(blur_region) else np.array([128])
bg_color_std = float(np.std(blur_region_pixels))
indicators["sharpness_ratio"] = round(sharpness_ratio, 2)
indicators["bg_color_std"] = round(bg_color_std, 2)
indicators["center_sharp_p90"] = round(center_sharp, 2)
indicators["edge_sharp_mean"] = round(edge_sharp, 2)
# ── Macro scoring (GATED by sharpness_ratio > 3.0) ───────────────
macro_score = 0.0
macro_components = []
macro_gate_passed = has_detail and sharpness_ratio > 3.0
if macro_gate_passed:
macro_score += 0.25
macro_components.append(f"ratio={sharpness_ratio:.1f}")
if blur_frac > 0.25:
macro_score += 0.15
macro_components.append(f"blur={blur_frac:.2f}")
if bimodal_ratio > 1.5:
macro_score += 0.20
macro_components.append(f"bimodal={bimodal_ratio:.2f}")
if bg_color_std < 40:
macro_score += 0.15
macro_components.append(f"bg_std={bg_color_std:.1f}")
if blur_uniformity > 0.6:
macro_score += 0.15
macro_components.append(f"blur_uni={blur_uniformity:.2f}")
indicators["macro_score"] = round(macro_score, 3)
indicators["macro_components"] = macro_components
indicators["macro_gate_passed"] = macro_gate_passed
if macro_score >= 0.55:
scores["MACRO_DSLR"] = macro_score
indicators["macro_detected"] = True
elif macro_score >= 0.4 and has_bayer:
scores["MACRO_DSLR"] = macro_score
indicators["macro_detected"] = True
# ── Screenshot detection ──────────────────────────────────────────
# Two types: (1) OS screenshot (PrintScreen/cmd+Shift) β€” pixel-perfect,
# no lens physics, exact resolution match, few unique colors
# (2) Photographed screen β€” has lens physics but also display grid artifacts
# H/V edge dominance (UI elements are rectilinear)
edge_mag = np.hypot(sobel(gray, 0), sobel(gray, 1))
strong = edge_mag > np.percentile(edge_mag, 95)
gx = sobel(gray, axis=1); gy = sobel(gray, axis=0)
h_edges = np.abs(gx) > np.abs(gy) * 3
v_edges = np.abs(gy) > np.abs(gx) * 3
hv_ratio = float(np.sum(h_edges | v_edges)) / (float(np.sum(strong)) + 1e-9)
# H/V ratio is only meaningful if edges are genuinely strong (not noise)
median_edge = float(np.median(edge_mag))
strong_edges_present = float(np.percentile(edge_mag, 95)) > max(5.0, median_edge * 3)
aspect = max(w, h) / (min(w, h) + 1e-9)
# Standard screen resolutions (and 2Γ— Retina variants)
standard_res = {
(1920, 1080), (2560, 1440), (3840, 2160), (1366, 768), (1280, 720),
(1440, 900), (1680, 1050), (2560, 1600), (2880, 1800), (3024, 1964),
(1536, 864), (1600, 900), (3456, 2234), (2560, 1080), (3440, 1440),
(1280, 800), (1024, 768), (2048, 1536), (2732, 2048),
}
is_standard_res = (w, h) in standard_res or (h, w) in standard_res
# Count unique colors per channel β€” UI screenshots use few exact colors
unique_colors = [len(np.unique(rgb[:, :, c].astype(np.uint8))) for c in range(3)]
max_unique = max(unique_colors)
low_color_count = max_unique < 32 # Pure UI screenshots use <30 distinct values per channel
# Uniform sharpness β€” no blur gradient (unlike camera photos)
sharpness_cv = float(np.std(sharpness)) / (float(np.mean(sharpness)) + 1e-9)
uniform_sharpness = sharpness_cv < 0.5
# No Bayer CFA = not a camera sensor
no_bayer = not has_bayer
screenshot_score = 0.0
screenshot_traits = 0 # Count distinct UI traits for gating
# Hard gate: images with real Bayer CFA pattern are from cameras, not screenshots
if not has_bayer:
# OS screenshot path β€” pixel-perfect, no lens physics
if is_standard_res:
screenshot_score += 0.2
screenshot_traits += 1
indicators["standard_resolution"] = True
if low_color_count:
screenshot_score += 0.25
screenshot_traits += 1
indicators["low_color_count"] = max_unique
if hv_ratio > 0.6 and strong_edges_present:
screenshot_score += 0.25
screenshot_traits += 1
if uniform_sharpness:
screenshot_score += 0.15
if low_color_count:
screenshot_score += 0.1 # No sensor + few colors = UI render
# Gate: require at least 2 distinct UI traits AND at least one must be
# content-based (colors or edges), not just resolution match.
# This prevents smooth AI images at 1920Γ—1080 from false-triggering.
has_content_trait = low_color_count or (hv_ratio > 0.6 and strong_edges_present)
if screenshot_traits < 2 or not has_content_trait:
screenshot_score = 0.0
if screenshot_score >= 0.4:
scores["SCREENSHOT"] = min(1.0, screenshot_score)
indicators["screenshot_detected"] = True
indicators["hv_ratio"] = round(hv_ratio, 3)
indicators["max_unique_colors"] = max_unique
indicators["sharpness_cv"] = round(sharpness_cv, 3)
# ── Double JPEG detection (messaging) ─────────────────────────────
hc, wc = (gray.shape[0] // 8) * 8, (gray.shape[1] // 8) * 8
blockiness = 1.0
if hc > 16 and wc > 16:
g = gray[:hc, :wc]
bd = [float(np.mean(np.abs(g[i, :] - g[i-1, :]))) for i in range(8, hc, 8)]
it = [float(np.mean(np.abs(g[i, :] - g[i-1, :]))) for i in range(1, hc) if i % 8 != 0]
if bd and it:
blockiness = float(np.mean(bd)) / (float(np.mean(it)) + 1e-9)
indicators["blockiness"] = round(blockiness, 3)
# ═══ METADATA-BASED DETECTION ═════════════════════════════════════
try:
exif = img._getexif() or {}
except:
exif = {}
from PIL.ExifTags import TAGS
decoded = {}
for tid, v in exif.items():
t = TAGS.get(tid, str(tid))
try: decoded[t] = str(v)[:200]
except: pass
has_exif = bool(decoded)
indicators["has_exif"] = has_exif
indicators["format"] = getattr(img, 'format', None)
phone_brands = ["apple", "samsung", "google", "pixel", "huawei", "xiaomi", "oneplus",
"oppo", "vivo", "realme", "motorola", "lg", "nothing"]
make = decoded.get("Make", "").lower()
model = decoded.get("Model", "").lower()
is_phone = any(b in make or b in model for b in phone_brands)
if is_phone:
scores["SMARTPHONE"] = scores.get("SMARTPHONE", 0) + 0.4
indicators["phone_brand"] = True
cam_fields = sum(["Make" in decoded, "Model" in decoded,
"LensModel" in decoded or "LensInfo" in decoded, "FocalLength" in decoded])
if cam_fields >= 3 and ("LensModel" in decoded or "LensInfo" in decoded):
scores["DSLR"] = scores.get("DSLR", 0) + 0.5
max_side = max(w, h)
no_exif_low_res = not has_exif and max_side <= 1600
if no_exif_low_res:
scores["MESSAGING"] = scores.get("MESSAGING", 0) + 0.3
indicators["no_exif_low_res"] = True
if blockiness > 1.3:
scores["MESSAGING"] = scores.get("MESSAGING", 0) + 0.2
indicators["double_jpeg"] = True
# ═══ DETERMINE MODALITY ═══════════════════════════════════════════
if not scores:
modality = "UNKNOWN"
conf = 0.2
else:
modality = max(scores, key=scores.get)
conf = min(1.0, scores[modality])
if scores.get("MACRO_DSLR", 0) >= 0.4:
modality = "MACRO_DSLR"
conf = min(1.0, scores["MACRO_DSLR"])
elif scores.get("PORTRAIT_MODE", 0) > 0.3:
modality = "PORTRAIT_MODE"
conf = min(1.0, scores["PORTRAIT_MODE"])
# SAFETY GUARD 1: No detail = possible AI
# Exception: SCREENSHOT modality is legitimately low-detail (UI renders exact colors,
# not photographic texture). If screenshot signals are strong, let it through.
if not has_detail and modality != "SCREENSHOT":
modality = "UNKNOWN"
conf = 0.2
indicators["safety_override"] = "Low-detail image β€” suppression disabled"
elif not has_detail and modality == "SCREENSHOT" and scores.get("SCREENSHOT", 0) < 0.6:
# Weak screenshot signal + no detail β†’ still override
modality = "UNKNOWN"
conf = 0.2
indicators["safety_override"] = "Low-detail, weak screenshot signal β€” suppression disabled"
# SAFETY GUARD 2: No real Bayer = not a real camera
if not has_bayer and modality in ("PORTRAIT_MODE", "SMARTPHONE", "MESSAGING"):
modality = "UNKNOWN"
conf = 0.2
indicators["safety_override"] = f"No Bayer CFA (margin={bayer_margin:.4f} < {MIN_BAYER_MARGIN}) β€” suppression disabled"
elif not has_bayer and modality == "MACRO_DSLR" and scores.get("MACRO_DSLR", 0) < 0.55:
modality = "UNKNOWN"
conf = 0.2
indicators["safety_override"] = f"Macro weak ({scores.get('MACRO_DSLR', 0):.2f}) + no Bayer β€” suppression disabled"
# ═══ DEBUG LOGGING ════════════════════════════════════════════════
print(f"[MODALITY] detected={modality} conf={conf:.2f} scores={scores}", file=sys.stderr)
print(f"[MODALITY] has_bayer={has_bayer} margin={bayer_margin:.4f} (min={MIN_BAYER_MARGIN})", file=sys.stderr)
print(f"[MODALITY] macro_score={macro_score:.3f} gate={macro_gate_passed} components={macro_components}", file=sys.stderr)
print(f"[MODALITY] sharpness_ratio={sharpness_ratio:.2f} bimodal={bimodal_ratio:.3f} blur_frac={blur_frac:.3f} blur_uni={blur_uniformity:.3f} bg_std={bg_color_std:.2f}", file=sys.stderr)
print(f"[MODALITY] p95={p95:.2f} has_detail={has_detail}", file=sys.stderr)
if indicators.get("safety_override"):
print(f"[MODALITY] SAFETY OVERRIDE: {indicators['safety_override']}", file=sys.stderr)
# ═══ BUILD ADJUSTMENTS ════════════════════════════════════════════
adjustments = _get_modality_adjustments(modality)
if modality == "PORTRAIT_MODE" and scores.get("MESSAGING", 0) > 0.15:
msg_adj = _get_modality_adjustments("MESSAGING")
for k, v in msg_adj.items():
adjustments[k] = min(adjustments.get(k, 1.0), v)
indicators["dual_modality"] = "PORTRAIT_MODE + MESSAGING"
if modality == "MACRO_DSLR" and scores.get("MESSAGING", 0) > 0.15:
msg_adj = _get_modality_adjustments("MESSAGING")
for k, v in msg_adj.items():
adjustments[k] = min(adjustments.get(k, 1.0), v)
indicators["dual_modality"] = "MACRO_DSLR + MESSAGING"
indicators["modality_scores"] = {k: round(v, 3) for k, v in scores.items()}
return ModalityResult(modality, round(conf, 3), indicators, adjustments)
def _get_modality_adjustments(modality: str) -> dict:
if modality == "MACRO_DSLR":
return {
"Autocorrelation Peak": 0.1, "Texture Repetition": 0.1, "DoF Consistency": 0.1,
"Bayer CFA Pattern": 0.3, "CFA Nyquist": 0.3, "PRNU Uniformity": 0.2,
"Demosaic Interpolation": 0.4, "DCT Kurtosis": 0.1, "Wavelet Kurtosis": 0.1,
"Spectral Slope 1/fΒ²": 0.4, "Spectral Symmetry": 0.3, "Phase Coherence": 0.4,
"Noise Spatial Frequency": 0.2, "Poisson-Gaussian Model": 0.2,
"HF Noise Structure": 0.3, "Pixel Response Linearity": 0.4,
"Saturation Clipping": 0.4, "VAE Patch Boundaries": 0.3,
"Vignetting cos⁴θ": 0.3, "Bokeh Shape": 0.3, "Fixed Pattern Noise": 0.3,
}
elif modality == "PORTRAIT_MODE":
return {
"Autocorrelation Peak": 0.1, "Texture Repetition": 0.1,
"VAE Patch Boundaries": 0.2, "PRNU Uniformity": 0.15,
"Poisson-Gaussian Model": 0.3, "DoF Consistency": 0.2,
"Vignetting cos⁴θ": 0.3, "HF Noise Structure": 0.3,
"Noise Spatial Frequency": 0.3, "CFA Nyquist": 0.25,
"Spectral Slope 1/fΒ²": 0.5, "Spectral Symmetry": 0.4,
"Phase Coherence": 0.4, "Pixel Response Linearity": 0.3,
"Demosaic Interpolation": 0.4, "Saturation Clipping": 0.4,
}
elif modality == "MESSAGING":
return {
"EXIF Completeness": 0.15, "Compression Ghosts": 0.2,
"ICC Color Profile": 0.2, "Maker Note": 0.2,
"Thumbnail Check": 0.2, "Software Detection": 0.2,
"JPEG Quantization": 0.3, "CFA Nyquist": 0.5,
"Watermark Detection": 0.2, "Demosaic Interpolation": 0.5,
}
elif modality == "SCREENSHOT":
return {
# Optical physics β€” screenshots have no lens, suppress all lens tests
"Vignetting cos⁴θ": 0.1, "Vignetting Symmetry": 0.1,
"Lens Distortion": 0.1, "Field Curvature": 0.1,
"CA Magnitude": 0.1, "CA Radial Gradient": 0.1, "Lateral CA": 0.1,
"Purple Fringing": 0.1, "Bokeh Shape": 0.1,
"Sharpness Falloff": 0.1, "DoF Consistency": 0.1, "DoF Gradient Direction": 0.1,
"Diffraction Limit": 0.1, "Optical Center": 0.1,
# Sensor β€” screenshots have no camera sensor
"PRNU Uniformity": 0.1, "Bayer CFA Pattern": 0.1, "CFA Nyquist": 0.1,
"Hot/Dead Pixels": 0.1, "Noise Autocorrelation": 0.1, "Demosaic Interpolation": 0.1,
"PRNU Cross-Channel": 0.1, "Poisson-Gaussian Model": 0.1,
"Fixed Pattern Noise": 0.1, "Green Pixel Imbalance": 0.1,
# Generative model β€” LCD pixel grid creates false frequency artifacts
"Diffusion Notches": 0.15, "FFT Grid 8Γ—8": 0.3, "FFT Grid 16Γ—16": 0.3,
# Statistical β€” UI color distributions violate natural-image priors
"Benford's Law": 0.2, "Color Histogram": 0.2,
"DCT Kurtosis": 0.3, "Wavelet Kurtosis": 0.3,
"Gradient Sparsity": 0.3,
# Metadata β€” screenshots lack camera EXIF by definition
"EXIF Completeness": 0.1, "Maker Note": 0.1, "Thumbnail Check": 0.1,
"GPS Plausibility": 0.1, "ICC Color Profile": 0.2,
}
elif modality == "SMARTPHONE":
return {
"Vignetting cos⁴θ": 0.5, "CFA Nyquist": 0.7,
"Poisson-Gaussian Model": 0.7, "Pixel Response Linearity": 0.4,
"Spectral Slope 1/fΒ²": 0.7,
}
else:
return {}