Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

FORENSIQ / agents /modality_detector.py

anky2002

feat: overhaul screenshot detection + suppression + explanation text (v7)

7f80464 verified 13 days ago

raw

history blame contribute delete

20.6 kB

	"""
	FORENSIQ — Capture Modality Detector v7

	v7: Overhauled SCREENSHOT detection — now detects OS screenshots (pixel-perfect,
	standard resolution, few unique colors, no Bayer CFA, uniform sharpness) in
	addition to photographed screens (H/V edge dominance). Expanded SCREENSHOT
	suppression from 15 to 30+ tests covering lens physics, sensor characteristics,
	LCD pixel grid frequency artifacts (Diffusion Notches), and UI color distribution
	violations (Benford's Law, Color Histogram).

	v6: Added Vignetting cos⁴θ, Bokeh Shape, and Fixed Pattern Noise ×0.3 suppression
	for MACRO_DSLR modality.

	v5: Added minimum bayer_margin threshold (0.005) to prevent JPEG re-encoding
	from creating false Bayer CFA patterns on AI images.
	"""

	import sys
	import numpy as np
	from PIL import Image
	from scipy.ndimage import gaussian_filter, sobel
	from scipy.signal import convolve2d
	from dataclasses import dataclass


	@dataclass
	class ModalityResult:
	modality: str
	confidence: float
	indicators: dict
	score_adjustments: dict


	# Minimum Bayer CFA margin — below this, the "Bayer" signal is likely
	# JPEG encoding artifacts, not a real camera sensor signature.
	# Real cameras: margin > 0.01. JPEG artifacts: margin < 0.001.
	MIN_BAYER_MARGIN = 0.005


	def detect_modality(img: Image.Image) -> ModalityResult:
	"""Detect capture modality from image content and metadata."""
	indicators = {}
	scores = {}

	w, h = img.size
	gray = np.array(img.convert("L")).astype(np.float64)
	rgb = np.array(img.convert("RGB")).astype(np.float64)

	# ═══ CRITICAL PRE-CHECK: Bayer CFA pattern ═══════════════════════
	noise_std = {}
	for c, nm in enumerate(["red", "green", "blue"]):
	ch = rgb[:, :, c]
	dn = gaussian_filter(ch, sigma=1.5)
	noise_std[nm] = float(np.std(ch - dn))

	bayer_margin = min(noise_std["red"], noise_std["blue"]) - noise_std["green"]
	has_bayer = (noise_std["green"] < min(noise_std["red"], noise_std["blue"])
	and bayer_margin > MIN_BAYER_MARGIN)

	indicators["has_bayer"] = has_bayer
	indicators["bayer_margin"] = round(bayer_margin, 4)
	indicators["bayer_margin_threshold"] = MIN_BAYER_MARGIN
	indicators["noise_g"] = round(noise_std["green"], 3)
	indicators["noise_r"] = round(noise_std["red"], 3)
	indicators["noise_b"] = round(noise_std["blue"], 3)

	# ═══ CONTENT-BASED DETECTION ═════════════════════════════════════

	# ── Sharpness analysis ────────────────────────────────────────────
	lap = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float64)
	laplacian = convolve2d(gray, lap, mode="same", boundary="symm")
	sharpness = gaussian_filter(np.abs(laplacian), sigma=max(10, min(h, w) // 80))

	p25 = float(np.percentile(sharpness, 25))
	p50 = float(np.percentile(sharpness, 50))
	p75 = float(np.percentile(sharpness, 75))
	p95 = float(np.percentile(sharpness, 95))

	iqr = p75 - p25
	bimodal_ratio = iqr / (p50 + 1e-9)

	sharp_thresh = p75
	sharp_region = sharpness > sharp_thresh
	sharp_frac = float(np.mean(sharp_region))

	blur_content_thresh = p95 * 0.20
	blur_region = sharpness < blur_content_thresh
	blur_frac = float(np.mean(blur_region))

	blur_vals = sharpness[blur_region] if np.any(blur_region) else np.array([1])
	blur_uniformity = 1.0 - min(float(np.std(blur_vals)) / (float(np.mean(blur_vals)) + 1e-9), 1.0)

	sharpness_grad = np.hypot(sobel(sharpness, 0), sobel(sharpness, 1))
	max_grad = float(np.percentile(sharpness_grad, 99))
	mean_grad = float(np.mean(sharpness_grad))
	transition = max_grad / (mean_grad + 1e-9)

	has_detail = p95 > 5.0

	indicators["p95_sharpness"] = round(p95, 2)
	indicators["bimodal_ratio"] = round(bimodal_ratio, 3)
	indicators["blur_frac"] = round(blur_frac, 3)
	indicators["blur_uniformity"] = round(blur_uniformity, 3)
	indicators["transition_abruptness"] = round(transition, 2)
	indicators["has_detail"] = has_detail

	# ── Portrait mode detection ───────────────────────────────────────
	can_be_portrait = has_detail and has_bayer

	portrait_score = 0.0
	if can_be_portrait and bimodal_ratio > 1.0:
	portrait_score += 0.25
	if can_be_portrait and blur_uniformity > 0.5:
	portrait_score += 0.2
	if can_be_portrait and transition > 4.0:
	portrait_score += 0.2
	if can_be_portrait and blur_frac > 0.2 and sharp_frac > 0.1:
	portrait_score += 0.15

	if portrait_score > 0.3 and has_bayer:
	scores["PORTRAIT_MODE"] = portrait_score
	indicators["portrait_detected"] = True

	# ── Macro/DSLR shallow DoF detection ─────────────────────────────
	ch_s, cw_s = gray.shape
	center_region = sharpness[ch_s//4:3ch_s//4, cw_s//4:3cw_s//4]
	edge_region = np.concatenate([
	sharpness[:ch_s//4, :].ravel(),
	sharpness[3*ch_s//4:, :].ravel(),
	sharpness[:, :cw_s//4].ravel(),
	sharpness[:, 3*cw_s//4:].ravel(),
	])
	center_sharp = float(np.percentile(center_region, 90))
	edge_sharp = float(np.mean(edge_region))
	sharpness_ratio = center_sharp / (edge_sharp + 1e-9)

	blur_region_pixels = gray[blur_region] if np.any(blur_region) else np.array([128])
	bg_color_std = float(np.std(blur_region_pixels))

	indicators["sharpness_ratio"] = round(sharpness_ratio, 2)
	indicators["bg_color_std"] = round(bg_color_std, 2)
	indicators["center_sharp_p90"] = round(center_sharp, 2)
	indicators["edge_sharp_mean"] = round(edge_sharp, 2)

	# ── Macro scoring (GATED by sharpness_ratio > 3.0) ───────────────
	macro_score = 0.0
	macro_components = []
	macro_gate_passed = has_detail and sharpness_ratio > 3.0

	if macro_gate_passed:
	macro_score += 0.25
	macro_components.append(f"ratio={sharpness_ratio:.1f}")

	if blur_frac > 0.25:
	macro_score += 0.15
	macro_components.append(f"blur={blur_frac:.2f}")
	if bimodal_ratio > 1.5:
	macro_score += 0.20
	macro_components.append(f"bimodal={bimodal_ratio:.2f}")
	if bg_color_std < 40:
	macro_score += 0.15
	macro_components.append(f"bg_std={bg_color_std:.1f}")
	if blur_uniformity > 0.6:
	macro_score += 0.15
	macro_components.append(f"blur_uni={blur_uniformity:.2f}")

	indicators["macro_score"] = round(macro_score, 3)
	indicators["macro_components"] = macro_components
	indicators["macro_gate_passed"] = macro_gate_passed

	if macro_score >= 0.55:
	scores["MACRO_DSLR"] = macro_score
	indicators["macro_detected"] = True
	elif macro_score >= 0.4 and has_bayer:
	scores["MACRO_DSLR"] = macro_score
	indicators["macro_detected"] = True

	# ── Screenshot detection ──────────────────────────────────────────
	# Two types: (1) OS screenshot (PrintScreen/cmd+Shift) — pixel-perfect,
	# no lens physics, exact resolution match, few unique colors
	# (2) Photographed screen — has lens physics but also display grid artifacts

	# H/V edge dominance (UI elements are rectilinear)
	edge_mag = np.hypot(sobel(gray, 0), sobel(gray, 1))
	strong = edge_mag > np.percentile(edge_mag, 95)
	gx = sobel(gray, axis=1); gy = sobel(gray, axis=0)
	h_edges = np.abs(gx) > np.abs(gy) * 3
	v_edges = np.abs(gy) > np.abs(gx) * 3
	hv_ratio = float(np.sum(h_edges \| v_edges)) / (float(np.sum(strong)) + 1e-9)

	# H/V ratio is only meaningful if edges are genuinely strong (not noise)
	median_edge = float(np.median(edge_mag))
	strong_edges_present = float(np.percentile(edge_mag, 95)) > max(5.0, median_edge * 3)

	aspect = max(w, h) / (min(w, h) + 1e-9)

	# Standard screen resolutions (and 2× Retina variants)
	standard_res = {
	(1920, 1080), (2560, 1440), (3840, 2160), (1366, 768), (1280, 720),
	(1440, 900), (1680, 1050), (2560, 1600), (2880, 1800), (3024, 1964),
	(1536, 864), (1600, 900), (3456, 2234), (2560, 1080), (3440, 1440),
	(1280, 800), (1024, 768), (2048, 1536), (2732, 2048),
	}
	is_standard_res = (w, h) in standard_res or (h, w) in standard_res

	# Count unique colors per channel — UI screenshots use few exact colors
	unique_colors = [len(np.unique(rgb[:, :, c].astype(np.uint8))) for c in range(3)]
	max_unique = max(unique_colors)
	low_color_count = max_unique < 32 # Pure UI screenshots use <30 distinct values per channel

	# Uniform sharpness — no blur gradient (unlike camera photos)
	sharpness_cv = float(np.std(sharpness)) / (float(np.mean(sharpness)) + 1e-9)
	uniform_sharpness = sharpness_cv < 0.5

	# No Bayer CFA = not a camera sensor
	no_bayer = not has_bayer

	screenshot_score = 0.0
	screenshot_traits = 0 # Count distinct UI traits for gating

	# Hard gate: images with real Bayer CFA pattern are from cameras, not screenshots
	if not has_bayer:
	# OS screenshot path — pixel-perfect, no lens physics
	if is_standard_res:
	screenshot_score += 0.2
	screenshot_traits += 1
	indicators["standard_resolution"] = True
	if low_color_count:
	screenshot_score += 0.25
	screenshot_traits += 1
	indicators["low_color_count"] = max_unique
	if hv_ratio > 0.6 and strong_edges_present:
	screenshot_score += 0.25
	screenshot_traits += 1
	if uniform_sharpness:
	screenshot_score += 0.15
	if low_color_count:
	screenshot_score += 0.1 # No sensor + few colors = UI render

	# Gate: require at least 2 distinct UI traits AND at least one must be
	# content-based (colors or edges), not just resolution match.
	# This prevents smooth AI images at 1920×1080 from false-triggering.
	has_content_trait = low_color_count or (hv_ratio > 0.6 and strong_edges_present)
	if screenshot_traits < 2 or not has_content_trait:
	screenshot_score = 0.0

	if screenshot_score >= 0.4:
	scores["SCREENSHOT"] = min(1.0, screenshot_score)
	indicators["screenshot_detected"] = True

	indicators["hv_ratio"] = round(hv_ratio, 3)
	indicators["max_unique_colors"] = max_unique
	indicators["sharpness_cv"] = round(sharpness_cv, 3)

	# ── Double JPEG detection (messaging) ─────────────────────────────
	hc, wc = (gray.shape[0] // 8) * 8, (gray.shape[1] // 8) * 8
	blockiness = 1.0
	if hc > 16 and wc > 16:
	g = gray[:hc, :wc]
	bd = [float(np.mean(np.abs(g[i, :] - g[i-1, :]))) for i in range(8, hc, 8)]
	it = [float(np.mean(np.abs(g[i, :] - g[i-1, :]))) for i in range(1, hc) if i % 8 != 0]
	if bd and it:
	blockiness = float(np.mean(bd)) / (float(np.mean(it)) + 1e-9)

	indicators["blockiness"] = round(blockiness, 3)

	# ═══ METADATA-BASED DETECTION ═════════════════════════════════════

	try:
	exif = img._getexif() or {}
	except:
	exif = {}

	from PIL.ExifTags import TAGS
	decoded = {}
	for tid, v in exif.items():
	t = TAGS.get(tid, str(tid))
	try: decoded[t] = str(v)[:200]
	except: pass

	has_exif = bool(decoded)
	indicators["has_exif"] = has_exif
	indicators["format"] = getattr(img, 'format', None)

	phone_brands = ["apple", "samsung", "google", "pixel", "huawei", "xiaomi", "oneplus",
	"oppo", "vivo", "realme", "motorola", "lg", "nothing"]
	make = decoded.get("Make", "").lower()
	model = decoded.get("Model", "").lower()
	is_phone = any(b in make or b in model for b in phone_brands)

	if is_phone:
	scores["SMARTPHONE"] = scores.get("SMARTPHONE", 0) + 0.4
	indicators["phone_brand"] = True

	cam_fields = sum(["Make" in decoded, "Model" in decoded,
	"LensModel" in decoded or "LensInfo" in decoded, "FocalLength" in decoded])
	if cam_fields >= 3 and ("LensModel" in decoded or "LensInfo" in decoded):
	scores["DSLR"] = scores.get("DSLR", 0) + 0.5

	max_side = max(w, h)
	no_exif_low_res = not has_exif and max_side <= 1600
	if no_exif_low_res:
	scores["MESSAGING"] = scores.get("MESSAGING", 0) + 0.3
	indicators["no_exif_low_res"] = True
	if blockiness > 1.3:
	scores["MESSAGING"] = scores.get("MESSAGING", 0) + 0.2
	indicators["double_jpeg"] = True

	# ═══ DETERMINE MODALITY ═══════════════════════════════════════════

	if not scores:
	modality = "UNKNOWN"
	conf = 0.2
	else:
	modality = max(scores, key=scores.get)
	conf = min(1.0, scores[modality])

	if scores.get("MACRO_DSLR", 0) >= 0.4:
	modality = "MACRO_DSLR"
	conf = min(1.0, scores["MACRO_DSLR"])
	elif scores.get("PORTRAIT_MODE", 0) > 0.3:
	modality = "PORTRAIT_MODE"
	conf = min(1.0, scores["PORTRAIT_MODE"])

	# SAFETY GUARD 1: No detail = possible AI
	# Exception: SCREENSHOT modality is legitimately low-detail (UI renders exact colors,
	# not photographic texture). If screenshot signals are strong, let it through.
	if not has_detail and modality != "SCREENSHOT":
	modality = "UNKNOWN"
	conf = 0.2
	indicators["safety_override"] = "Low-detail image — suppression disabled"
	elif not has_detail and modality == "SCREENSHOT" and scores.get("SCREENSHOT", 0) < 0.6:
	# Weak screenshot signal + no detail → still override
	modality = "UNKNOWN"
	conf = 0.2
	indicators["safety_override"] = "Low-detail, weak screenshot signal — suppression disabled"

	# SAFETY GUARD 2: No real Bayer = not a real camera
	if not has_bayer and modality in ("PORTRAIT_MODE", "SMARTPHONE", "MESSAGING"):
	modality = "UNKNOWN"
	conf = 0.2
	indicators["safety_override"] = f"No Bayer CFA (margin={bayer_margin:.4f} < {MIN_BAYER_MARGIN}) — suppression disabled"
	elif not has_bayer and modality == "MACRO_DSLR" and scores.get("MACRO_DSLR", 0) < 0.55:
	modality = "UNKNOWN"
	conf = 0.2
	indicators["safety_override"] = f"Macro weak ({scores.get('MACRO_DSLR', 0):.2f}) + no Bayer — suppression disabled"

	# ═══ DEBUG LOGGING ════════════════════════════════════════════════
	print(f"[MODALITY] detected={modality} conf={conf:.2f} scores={scores}", file=sys.stderr)
	print(f"[MODALITY] has_bayer={has_bayer} margin={bayer_margin:.4f} (min={MIN_BAYER_MARGIN})", file=sys.stderr)
	print(f"[MODALITY] macro_score={macro_score:.3f} gate={macro_gate_passed} components={macro_components}", file=sys.stderr)
	print(f"[MODALITY] sharpness_ratio={sharpness_ratio:.2f} bimodal={bimodal_ratio:.3f} blur_frac={blur_frac:.3f} blur_uni={blur_uniformity:.3f} bg_std={bg_color_std:.2f}", file=sys.stderr)
	print(f"[MODALITY] p95={p95:.2f} has_detail={has_detail}", file=sys.stderr)
	if indicators.get("safety_override"):
	print(f"[MODALITY] SAFETY OVERRIDE: {indicators['safety_override']}", file=sys.stderr)

	# ═══ BUILD ADJUSTMENTS ════════════════════════════════════════════

	adjustments = _get_modality_adjustments(modality)

	if modality == "PORTRAIT_MODE" and scores.get("MESSAGING", 0) > 0.15:
	msg_adj = _get_modality_adjustments("MESSAGING")
	for k, v in msg_adj.items():
	adjustments[k] = min(adjustments.get(k, 1.0), v)
	indicators["dual_modality"] = "PORTRAIT_MODE + MESSAGING"

	if modality == "MACRO_DSLR" and scores.get("MESSAGING", 0) > 0.15:
	msg_adj = _get_modality_adjustments("MESSAGING")
	for k, v in msg_adj.items():
	adjustments[k] = min(adjustments.get(k, 1.0), v)
	indicators["dual_modality"] = "MACRO_DSLR + MESSAGING"

	indicators["modality_scores"] = {k: round(v, 3) for k, v in scores.items()}

	return ModalityResult(modality, round(conf, 3), indicators, adjustments)


	def _get_modality_adjustments(modality: str) -> dict:
	if modality == "MACRO_DSLR":
	return {
	"Autocorrelation Peak": 0.1, "Texture Repetition": 0.1, "DoF Consistency": 0.1,
	"Bayer CFA Pattern": 0.3, "CFA Nyquist": 0.3, "PRNU Uniformity": 0.2,
	"Demosaic Interpolation": 0.4, "DCT Kurtosis": 0.1, "Wavelet Kurtosis": 0.1,
	"Spectral Slope 1/f²": 0.4, "Spectral Symmetry": 0.3, "Phase Coherence": 0.4,
	"Noise Spatial Frequency": 0.2, "Poisson-Gaussian Model": 0.2,
	"HF Noise Structure": 0.3, "Pixel Response Linearity": 0.4,
	"Saturation Clipping": 0.4, "VAE Patch Boundaries": 0.3,
	"Vignetting cos⁴θ": 0.3, "Bokeh Shape": 0.3, "Fixed Pattern Noise": 0.3,
	}
	elif modality == "PORTRAIT_MODE":
	return {
	"Autocorrelation Peak": 0.1, "Texture Repetition": 0.1,
	"VAE Patch Boundaries": 0.2, "PRNU Uniformity": 0.15,
	"Poisson-Gaussian Model": 0.3, "DoF Consistency": 0.2,
	"Vignetting cos⁴θ": 0.3, "HF Noise Structure": 0.3,
	"Noise Spatial Frequency": 0.3, "CFA Nyquist": 0.25,
	"Spectral Slope 1/f²": 0.5, "Spectral Symmetry": 0.4,
	"Phase Coherence": 0.4, "Pixel Response Linearity": 0.3,
	"Demosaic Interpolation": 0.4, "Saturation Clipping": 0.4,
	}
	elif modality == "MESSAGING":
	return {
	"EXIF Completeness": 0.15, "Compression Ghosts": 0.2,
	"ICC Color Profile": 0.2, "Maker Note": 0.2,
	"Thumbnail Check": 0.2, "Software Detection": 0.2,
	"JPEG Quantization": 0.3, "CFA Nyquist": 0.5,
	"Watermark Detection": 0.2, "Demosaic Interpolation": 0.5,
	}
	elif modality == "SCREENSHOT":
	return {
	# Optical physics — screenshots have no lens, suppress all lens tests
	"Vignetting cos⁴θ": 0.1, "Vignetting Symmetry": 0.1,
	"Lens Distortion": 0.1, "Field Curvature": 0.1,
	"CA Magnitude": 0.1, "CA Radial Gradient": 0.1, "Lateral CA": 0.1,
	"Purple Fringing": 0.1, "Bokeh Shape": 0.1,
	"Sharpness Falloff": 0.1, "DoF Consistency": 0.1, "DoF Gradient Direction": 0.1,
	"Diffraction Limit": 0.1, "Optical Center": 0.1,
	# Sensor — screenshots have no camera sensor
	"PRNU Uniformity": 0.1, "Bayer CFA Pattern": 0.1, "CFA Nyquist": 0.1,
	"Hot/Dead Pixels": 0.1, "Noise Autocorrelation": 0.1, "Demosaic Interpolation": 0.1,
	"PRNU Cross-Channel": 0.1, "Poisson-Gaussian Model": 0.1,
	"Fixed Pattern Noise": 0.1, "Green Pixel Imbalance": 0.1,
	# Generative model — LCD pixel grid creates false frequency artifacts
	"Diffusion Notches": 0.15, "FFT Grid 8×8": 0.3, "FFT Grid 16×16": 0.3,
	# Statistical — UI color distributions violate natural-image priors
	"Benford's Law": 0.2, "Color Histogram": 0.2,
	"DCT Kurtosis": 0.3, "Wavelet Kurtosis": 0.3,
	"Gradient Sparsity": 0.3,
	# Metadata — screenshots lack camera EXIF by definition
	"EXIF Completeness": 0.1, "Maker Note": 0.1, "Thumbnail Check": 0.1,
	"GPS Plausibility": 0.1, "ICC Color Profile": 0.2,
	}
	elif modality == "SMARTPHONE":
	return {
	"Vignetting cos⁴θ": 0.5, "CFA Nyquist": 0.7,
	"Poisson-Gaussian Model": 0.7, "Pixel Response Linearity": 0.4,
	"Spectral Slope 1/f²": 0.7,
	}
	else:
	return {}