anky2002 commited on
Commit
15d0ba2
Β·
verified Β·
1 Parent(s): 7b95f04

Upload agents/modality_detector.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. agents/modality_detector.py +270 -0
agents/modality_detector.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FORENSIQ β€” Capture Modality Detector
3
+
4
+ Classifies images into capture modalities BEFORE forensic analysis.
5
+ Each modality has known false-positive patterns that agents must account for.
6
+
7
+ Modalities:
8
+ DSLR β€” Traditional camera, raw/JPEG from camera firmware
9
+ SMARTPHONE β€” Standard smartphone photo (no portrait mode)
10
+ PORTRAIT_MODE β€” Smartphone portrait mode (computational bokeh)
11
+ SCREENSHOT β€” Screen capture
12
+ MESSAGING β€” Compressed via WhatsApp/Telegram/etc (stripped metadata, double JPEG)
13
+ SOCIAL_MEDIA β€” Downloaded from Instagram/Facebook/Twitter (re-encoded, stripped)
14
+ UNKNOWN β€” Cannot determine
15
+ """
16
+
17
+ import numpy as np
18
+ from PIL import Image
19
+ from scipy.ndimage import gaussian_filter, sobel
20
+ from dataclasses import dataclass
21
+ from typing import Optional
22
+
23
+
24
+ @dataclass
25
+ class ModalityResult:
26
+ modality: str # Primary modality classification
27
+ confidence: float # 0-1
28
+ indicators: dict # Evidence for the classification
29
+ score_adjustments: dict # Per-test score multipliers (1.0 = no change, 0.0 = suppress)
30
+
31
+
32
+ def detect_modality(img: Image.Image) -> ModalityResult:
33
+ """Detect capture modality from image properties."""
34
+ indicators = {}
35
+ scores = {} # modality -> evidence strength
36
+
37
+ w, h = img.size
38
+
39
+ # ── 1. Metadata analysis ──────────────────────────────────────────
40
+ try:
41
+ exif = img._getexif() or {}
42
+ except:
43
+ exif = {}
44
+
45
+ from PIL.ExifTags import TAGS
46
+ decoded = {}
47
+ for tid, v in exif.items():
48
+ t = TAGS.get(tid, str(tid))
49
+ try:
50
+ decoded[t] = str(v)[:200]
51
+ except:
52
+ pass
53
+
54
+ has_make = "Make" in decoded
55
+ has_model = "Model" in decoded
56
+ has_lens = "LensModel" in decoded or "LensInfo" in decoded
57
+ has_focal = "FocalLength" in decoded
58
+ has_software = "Software" in decoded
59
+ has_gps = "GPSInfo" in decoded
60
+ info = img.info or {}
61
+ source_format = getattr(img, 'format', None)
62
+
63
+ cam_fields = sum([has_make, has_model, has_lens, has_focal])
64
+ indicators["exif_camera_fields"] = cam_fields
65
+ indicators["has_exif"] = bool(decoded)
66
+ indicators["format"] = source_format
67
+
68
+ # Rich EXIF with lens info β†’ DSLR
69
+ if cam_fields >= 3 and has_lens:
70
+ scores["DSLR"] = scores.get("DSLR", 0) + 0.4
71
+
72
+ # Camera make is a phone brand
73
+ phone_brands = ["apple", "samsung", "google", "pixel", "huawei", "xiaomi", "oneplus",
74
+ "oppo", "vivo", "realme", "motorola", "lg", "sony xperia", "nothing"]
75
+ make = decoded.get("Make", "").lower()
76
+ model = decoded.get("Model", "").lower()
77
+ if any(b in make or b in model for b in phone_brands):
78
+ scores["SMARTPHONE"] = scores.get("SMARTPHONE", 0) + 0.5
79
+ indicators["phone_brand"] = True
80
+
81
+ # No EXIF at all β†’ messaging/social or AI
82
+ if not decoded:
83
+ scores["MESSAGING"] = scores.get("MESSAGING", 0) + 0.3
84
+ scores["SOCIAL_MEDIA"] = scores.get("SOCIAL_MEDIA", 0) + 0.2
85
+ indicators["no_exif"] = True
86
+
87
+ # ── 2. Resolution analysis ────────────────────────────────────────
88
+ mp = w * h / 1e6
89
+ indicators["megapixels"] = round(mp, 2)
90
+
91
+ # Common messaging app resolutions (WhatsApp compresses to ~1600px max side)
92
+ max_side = max(w, h)
93
+ if max_side <= 1600 and mp < 3:
94
+ scores["MESSAGING"] = scores.get("MESSAGING", 0) + 0.25
95
+ indicators["low_res"] = True
96
+
97
+ # Screenshot-like aspect ratios (phone screens)
98
+ ratio = max(w, h) / min(w, h)
99
+ if ratio > 1.9 and max_side > 1000: # Tall phone screenshots
100
+ scores["SCREENSHOT"] = scores.get("SCREENSHOT", 0) + 0.3
101
+ indicators["tall_ratio"] = round(ratio, 2)
102
+
103
+ # Standard phone ratios: 4:3 or 16:9
104
+ if abs(ratio - 4/3) < 0.05 or abs(ratio - 16/9) < 0.05:
105
+ scores["SMARTPHONE"] = scores.get("SMARTPHONE", 0) + 0.1
106
+
107
+ # ── 3. Portrait mode detection (computational bokeh) ──────────────
108
+ gray = np.array(img.convert("L")).astype(np.float64)
109
+
110
+ # Compute local sharpness map
111
+ lap = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float64)
112
+ from scipy.signal import convolve2d
113
+ laplacian = convolve2d(gray, lap, mode="same", boundary="symm")
114
+ sharpness = gaussian_filter(np.abs(laplacian), sigma=10)
115
+
116
+ # Portrait mode signature: sharp foreground + uniformly blurred background
117
+ # with an ABRUPT transition between them (not gradual like real DoF)
118
+ sharp_thresh = np.percentile(sharpness, 75)
119
+ blur_thresh = np.percentile(sharpness, 25)
120
+
121
+ sharp_region = sharpness > sharp_thresh
122
+ blur_region = sharpness < blur_thresh
123
+
124
+ # Compute transition sharpness
125
+ # In portrait mode: boundary between sharp/blur is very steep
126
+ # In real DoF: boundary is gradual
127
+ sharp_fraction = float(np.mean(sharp_region))
128
+ blur_fraction = float(np.mean(blur_region))
129
+
130
+ # Check if blur is very uniform (computational vs optical)
131
+ blur_values = sharpness[blur_region] if np.any(blur_region) else np.array([0])
132
+ blur_uniformity = 1.0 - min(float(np.std(blur_values)) / (float(np.mean(blur_values)) + 1e-9), 1.0)
133
+
134
+ indicators["sharp_fraction"] = round(sharp_fraction, 3)
135
+ indicators["blur_fraction"] = round(blur_fraction, 3)
136
+ indicators["blur_uniformity"] = round(blur_uniformity, 3)
137
+
138
+ # Strong portrait mode signal: distinct sharp/blur regions with uniform blur
139
+ if blur_fraction > 0.3 and blur_uniformity > 0.6 and sharp_fraction > 0.15:
140
+ scores["PORTRAIT_MODE"] = scores.get("PORTRAIT_MODE", 0) + 0.5
141
+ indicators["portrait_mode_signature"] = True
142
+
143
+ # ── 4. Screenshot detection ───────────────────────────────────────
144
+ # Screenshots have: perfect pixel edges, UI elements, uniform background areas
145
+ edge_mag = np.hypot(sobel(gray, 0), sobel(gray, 1))
146
+
147
+ # Perfect horizontal/vertical edges (UI elements)
148
+ strong_edges = edge_mag > np.percentile(edge_mag, 95)
149
+ gx = sobel(gray, axis=1)
150
+ gy = sobel(gray, axis=0)
151
+
152
+ # Ratio of H/V edges to diagonal edges
153
+ h_edges = np.abs(gx) > np.abs(gy) * 3 # Strongly horizontal
154
+ v_edges = np.abs(gy) > np.abs(gx) * 3 # Strongly vertical
155
+ hv_ratio = float(np.sum(h_edges | v_edges)) / (float(np.sum(strong_edges)) + 1e-9)
156
+
157
+ if hv_ratio > 0.6:
158
+ scores["SCREENSHOT"] = scores.get("SCREENSHOT", 0) + 0.3
159
+ indicators["hv_edge_ratio"] = round(hv_ratio, 3)
160
+
161
+ # ── 5. Double JPEG / messaging detection ──────────────────────────
162
+ # Check for 8x8 block boundary artifacts (double JPEG)
163
+ hc, wc = (gray.shape[0] // 8) * 8, (gray.shape[1] // 8) * 8
164
+ if hc > 16 and wc > 16:
165
+ g = gray[:hc, :wc]
166
+ bd = [float(np.mean(np.abs(g[i, :] - g[i-1, :]))) for i in range(8, hc, 8)]
167
+ it = [float(np.mean(np.abs(g[i, :] - g[i-1, :]))) for i in range(1, hc) if i % 8 != 0]
168
+ if bd and it:
169
+ blockiness = float(np.mean(bd)) / (float(np.mean(it)) + 1e-9)
170
+ if blockiness > 1.3:
171
+ scores["MESSAGING"] = scores.get("MESSAGING", 0) + 0.2
172
+ indicators["double_jpeg"] = round(blockiness, 3)
173
+
174
+ # ── 6. Determine primary modality ─────────────────────────────────
175
+ if not scores:
176
+ modality = "UNKNOWN"
177
+ confidence = 0.2
178
+ else:
179
+ modality = max(scores, key=scores.get)
180
+ confidence = min(1.0, scores[modality])
181
+
182
+ # Override: if portrait mode + smartphone, portrait mode wins
183
+ if scores.get("PORTRAIT_MODE", 0) > 0.3 and scores.get("SMARTPHONE", 0) > 0:
184
+ modality = "PORTRAIT_MODE"
185
+ confidence = min(1.0, scores["PORTRAIT_MODE"] + scores["SMARTPHONE"] * 0.3)
186
+
187
+ # ── 7. Build score adjustments per modality ───────────────────────
188
+ adjustments = _get_modality_adjustments(modality)
189
+
190
+ return ModalityResult(
191
+ modality=modality,
192
+ confidence=round(confidence, 3),
193
+ indicators=indicators,
194
+ score_adjustments=adjustments,
195
+ )
196
+
197
+
198
+ def _get_modality_adjustments(modality: str) -> dict:
199
+ """
200
+ Return per-test score multipliers for known false-positive patterns.
201
+ 1.0 = no change, 0.0 = suppress entirely, 0.5 = halve the score.
202
+ """
203
+
204
+ if modality == "PORTRAIT_MODE":
205
+ return {
206
+ # These tests false-positive on computational bokeh
207
+ "Autocorrelation Peak": 0.1, # Bokeh creates periodic patterns
208
+ "Texture Repetition": 0.1, # Bokeh is repetitive by design
209
+ "VAE Patch Boundaries": 0.2, # Segmentation mask operates in blocks
210
+ "PRNU Uniformity": 0.15, # Dual-region noise (sharp vs blur)
211
+ "Poisson-Gaussian Model": 0.3, # Noise model breaks with synthetic blur
212
+ "DoF Consistency": 0.2, # Abrupt transitions are EXPECTED
213
+ "Vignetting cos⁴θ": 0.3, # Smartphones don't follow cos⁴θ
214
+ "HF Noise Structure": 0.3, # Blur region has different noise
215
+ "Noise Spatial Frequency": 0.3, # Same reason
216
+ "CFA Nyquist": 0.5, # Computational processing removes CFA
217
+ }
218
+
219
+ elif modality == "MESSAGING":
220
+ return {
221
+ # These tests false-positive on messaging compression
222
+ "EXIF Completeness": 0.15, # WhatsApp strips ALL EXIF β€” this is normal
223
+ "Compression Ghosts": 0.2, # Double JPEG is expected
224
+ "ICC Color Profile": 0.2, # Stripped by messaging apps
225
+ "Maker Note": 0.2, # Stripped
226
+ "Thumbnail Check": 0.2, # Stripped
227
+ "Software Detection": 0.2, # Stripped
228
+ "JPEG Quantization": 0.3, # Re-encoded with generic tables
229
+ "CFA Nyquist": 0.5, # Re-encoding destroys CFA traces
230
+ }
231
+
232
+ elif modality == "SOCIAL_MEDIA":
233
+ return {
234
+ "EXIF Completeness": 0.2,
235
+ "Compression Ghosts": 0.3,
236
+ "ICC Color Profile": 0.3,
237
+ "Maker Note": 0.2,
238
+ "Thumbnail Check": 0.3,
239
+ }
240
+
241
+ elif modality == "SCREENSHOT":
242
+ return {
243
+ # Screenshots are NOT photos β€” most optical/sensor tests are meaningless
244
+ "Vignetting cos⁴θ": 0.1,
245
+ "Vignetting Symmetry": 0.1,
246
+ "Lens Distortion": 0.1,
247
+ "Field Curvature": 0.1,
248
+ "CA Magnitude": 0.1,
249
+ "CA Radial Gradient": 0.1,
250
+ "Lateral CA": 0.1,
251
+ "Purple Fringing": 0.1,
252
+ "Bokeh Shape": 0.1,
253
+ "PRNU Uniformity": 0.1,
254
+ "Bayer CFA Pattern": 0.1,
255
+ "CFA Nyquist": 0.1,
256
+ "Hot/Dead Pixels": 0.1,
257
+ "Noise Autocorrelation": 0.1,
258
+ "Demosaic Interpolation": 0.1,
259
+ }
260
+
261
+ elif modality == "SMARTPHONE":
262
+ return {
263
+ # Smartphones use computational photography β€” mild suppression
264
+ "Vignetting cos⁴θ": 0.5, # Computational correction
265
+ "CFA Nyquist": 0.7, # Heavy ISP processing
266
+ "Poisson-Gaussian Model": 0.7, # Noise reduction
267
+ }
268
+
269
+ else: # DSLR or UNKNOWN
270
+ return {} # No adjustments β€” full scoring