Upload agents/modality_detector.py with huggingface_hub
Browse files- agents/modality_detector.py +32 -13
agents/modality_detector.py
CHANGED
|
@@ -124,17 +124,21 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 124 |
sharp_fraction = float(np.mean(sharp_region))
|
| 125 |
blur_fraction = float(np.mean(blur_region))
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
# Check if blur is very uniform (computational vs optical)
|
| 128 |
blur_values = sharpness[blur_region] if np.any(blur_region) else np.array([0])
|
| 129 |
blur_uniformity = 1.0 - min(float(np.std(blur_values)) / (float(np.mean(blur_values)) + 1e-9), 1.0)
|
| 130 |
|
| 131 |
-
# Check transition abruptness
|
| 132 |
-
# Compute gradient of sharpness map
|
| 133 |
sharpness_grad = np.hypot(
|
| 134 |
sobel(sharpness, axis=0),
|
| 135 |
sobel(sharpness, axis=1)
|
| 136 |
)
|
| 137 |
-
# High max gradient at the transition = abrupt = computational
|
| 138 |
max_transition = float(np.percentile(sharpness_grad, 99))
|
| 139 |
mean_transition = float(np.mean(sharpness_grad))
|
| 140 |
transition_abruptness = max_transition / (mean_transition + 1e-9)
|
|
@@ -143,24 +147,33 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 143 |
indicators["blur_fraction"] = round(blur_fraction, 3)
|
| 144 |
indicators["blur_uniformity"] = round(blur_uniformity, 3)
|
| 145 |
indicators["transition_abruptness"] = round(transition_abruptness, 3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
# Portrait mode detection β relaxed thresholds + multiple signals
|
| 148 |
portrait_signals = 0
|
| 149 |
-
if blur_fraction > 0.2 and sharp_fraction > 0.1:
|
| 150 |
portrait_signals += 1
|
| 151 |
-
if blur_uniformity > 0.5:
|
| 152 |
portrait_signals += 1
|
| 153 |
-
if transition_abruptness > 5
|
| 154 |
portrait_signals += 1
|
| 155 |
|
| 156 |
-
# Strong portrait mode: at least 2 of 3 signals
|
| 157 |
-
if portrait_signals >= 2:
|
| 158 |
scores["PORTRAIT_MODE"] = scores.get("PORTRAIT_MODE", 0) + 0.3 * portrait_signals
|
| 159 |
indicators["portrait_mode_signature"] = True
|
| 160 |
-
elif portrait_signals == 1 and blur_fraction > 0.25:
|
| 161 |
scores["PORTRAIT_MODE"] = scores.get("PORTRAIT_MODE", 0) + 0.2
|
| 162 |
indicators["portrait_mode_weak"] = True
|
| 163 |
|
|
|
|
|
|
|
|
|
|
| 164 |
# ββ 4. Screenshot detection βββββββββββββββββββββββββββββββββββββββ
|
| 165 |
# Screenshots have: perfect pixel edges, UI elements, uniform background areas
|
| 166 |
edge_mag = np.hypot(sobel(gray, 0), sobel(gray, 1))
|
|
@@ -201,12 +214,18 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 201 |
confidence = min(1.0, scores[modality])
|
| 202 |
|
| 203 |
# Override: portrait mode wins over messaging/smartphone when detected
|
| 204 |
-
# (a portrait photo sent via WhatsApp should get PORTRAIT_MODE adjustments,
|
| 205 |
-
# which are a superset of MESSAGING adjustments for the tests that matter)
|
| 206 |
if scores.get("PORTRAIT_MODE", 0) > 0.2:
|
| 207 |
modality = "PORTRAIT_MODE"
|
| 208 |
confidence = min(1.0, scores["PORTRAIT_MODE"])
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
# ββ 7. Build score adjustments ββββββββββββββββββββββββββββββββββββ
|
| 211 |
# Merge adjustments when multiple modalities detected
|
| 212 |
# (e.g., portrait mode photo sent via messaging app gets BOTH sets)
|
|
@@ -249,7 +268,7 @@ def _get_modality_adjustments(modality: str) -> dict:
|
|
| 249 |
"Vignetting cosβ΄ΞΈ": 0.3, # Smartphones don't follow cosβ΄ΞΈ
|
| 250 |
"HF Noise Structure": 0.3, # Blur region has different noise
|
| 251 |
"Noise Spatial Frequency": 0.3, # Same reason
|
| 252 |
-
"CFA Nyquist": 0.
|
| 253 |
# Spectral tests affected by computational photography
|
| 254 |
"Spectral Slope 1/fΒ²": 0.5, # Frequency-selective sharpening steepens slope
|
| 255 |
"Spectral Symmetry": 0.4, # Depth-based processing creates asymmetry
|
|
|
|
| 124 |
sharp_fraction = float(np.mean(sharp_region))
|
| 125 |
blur_fraction = float(np.mean(blur_region))
|
| 126 |
|
| 127 |
+
# CRITICAL: Check absolute sharpness level, not just relative
|
| 128 |
+
# Real portrait photos have genuinely sharp foreground (textures, edges, pores)
|
| 129 |
+
# AI images are smooth everywhere β even the "sharp" region is soft
|
| 130 |
+
peak_sharpness = float(np.percentile(sharpness, 95))
|
| 131 |
+
median_sharpness = float(np.median(sharpness))
|
| 132 |
+
|
| 133 |
# Check if blur is very uniform (computational vs optical)
|
| 134 |
blur_values = sharpness[blur_region] if np.any(blur_region) else np.array([0])
|
| 135 |
blur_uniformity = 1.0 - min(float(np.std(blur_values)) / (float(np.mean(blur_values)) + 1e-9), 1.0)
|
| 136 |
|
| 137 |
+
# Check transition abruptness
|
|
|
|
| 138 |
sharpness_grad = np.hypot(
|
| 139 |
sobel(sharpness, axis=0),
|
| 140 |
sobel(sharpness, axis=1)
|
| 141 |
)
|
|
|
|
| 142 |
max_transition = float(np.percentile(sharpness_grad, 99))
|
| 143 |
mean_transition = float(np.mean(sharpness_grad))
|
| 144 |
transition_abruptness = max_transition / (mean_transition + 1e-9)
|
|
|
|
| 147 |
indicators["blur_fraction"] = round(blur_fraction, 3)
|
| 148 |
indicators["blur_uniformity"] = round(blur_uniformity, 3)
|
| 149 |
indicators["transition_abruptness"] = round(transition_abruptness, 3)
|
| 150 |
+
indicators["peak_sharpness"] = round(peak_sharpness, 2)
|
| 151 |
+
indicators["median_sharpness"] = round(median_sharpness, 2)
|
| 152 |
+
|
| 153 |
+
# Portrait mode detection β requires BOTH relative and absolute sharpness
|
| 154 |
+
# A real portrait photo has genuinely sharp foreground detail
|
| 155 |
+
# An AI-generated smooth image has low peak sharpness even if center > edges
|
| 156 |
+
has_genuine_detail = peak_sharpness > 10.0 # Real photos have Laplacian variance > 10
|
| 157 |
|
|
|
|
| 158 |
portrait_signals = 0
|
| 159 |
+
if blur_fraction > 0.2 and sharp_fraction > 0.1 and has_genuine_detail:
|
| 160 |
portrait_signals += 1
|
| 161 |
+
if blur_uniformity > 0.5 and has_genuine_detail:
|
| 162 |
portrait_signals += 1
|
| 163 |
+
if transition_abruptness > 5 and has_genuine_detail:
|
| 164 |
portrait_signals += 1
|
| 165 |
|
| 166 |
+
# Strong portrait mode: at least 2 of 3 signals AND genuine foreground detail
|
| 167 |
+
if portrait_signals >= 2 and has_genuine_detail:
|
| 168 |
scores["PORTRAIT_MODE"] = scores.get("PORTRAIT_MODE", 0) + 0.3 * portrait_signals
|
| 169 |
indicators["portrait_mode_signature"] = True
|
| 170 |
+
elif portrait_signals == 1 and blur_fraction > 0.25 and has_genuine_detail:
|
| 171 |
scores["PORTRAIT_MODE"] = scores.get("PORTRAIT_MODE", 0) + 0.2
|
| 172 |
indicators["portrait_mode_weak"] = True
|
| 173 |
|
| 174 |
+
if not has_genuine_detail:
|
| 175 |
+
indicators["low_detail_image"] = True # Smooth everywhere = possible AI
|
| 176 |
+
|
| 177 |
# ββ 4. Screenshot detection βββββββββββββββββββββββββββββββββββββββ
|
| 178 |
# Screenshots have: perfect pixel edges, UI elements, uniform background areas
|
| 179 |
edge_mag = np.hypot(sobel(gray, 0), sobel(gray, 1))
|
|
|
|
| 214 |
confidence = min(1.0, scores[modality])
|
| 215 |
|
| 216 |
# Override: portrait mode wins over messaging/smartphone when detected
|
|
|
|
|
|
|
| 217 |
if scores.get("PORTRAIT_MODE", 0) > 0.2:
|
| 218 |
modality = "PORTRAIT_MODE"
|
| 219 |
confidence = min(1.0, scores["PORTRAIT_MODE"])
|
| 220 |
|
| 221 |
+
# SAFETY GUARD: If image has no genuine photographic detail (low peak sharpness),
|
| 222 |
+
# it's likely AI-generated, not a real camera photo. In this case, do NOT apply
|
| 223 |
+
# any modality suppression β let all forensic tests fire at full strength.
|
| 224 |
+
if indicators.get("low_detail_image", False):
|
| 225 |
+
modality = "UNKNOWN"
|
| 226 |
+
confidence = 0.2
|
| 227 |
+
indicators["modality_override"] = "Low-detail image β suppression disabled to avoid protecting AI content"
|
| 228 |
+
|
| 229 |
# ββ 7. Build score adjustments ββββββββββββββββββββββββββββββββββββ
|
| 230 |
# Merge adjustments when multiple modalities detected
|
| 231 |
# (e.g., portrait mode photo sent via messaging app gets BOTH sets)
|
|
|
|
| 268 |
"Vignetting cosβ΄ΞΈ": 0.3, # Smartphones don't follow cosβ΄ΞΈ
|
| 269 |
"HF Noise Structure": 0.3, # Blur region has different noise
|
| 270 |
"Noise Spatial Frequency": 0.3, # Same reason
|
| 271 |
+
"CFA Nyquist": 0.25, # Computational processing destroys CFA traces entirely
|
| 272 |
# Spectral tests affected by computational photography
|
| 273 |
"Spectral Slope 1/fΒ²": 0.5, # Frequency-selective sharpening steepens slope
|
| 274 |
"Spectral Symmetry": 0.4, # Depth-based processing creates asymmetry
|