Upload agents/modality_detector.py with huggingface_hub
Browse files- agents/modality_detector.py +47 -11
agents/modality_detector.py
CHANGED
|
@@ -121,9 +121,6 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 121 |
sharp_region = sharpness > sharp_thresh
|
| 122 |
blur_region = sharpness < blur_thresh
|
| 123 |
|
| 124 |
-
# Compute transition sharpness
|
| 125 |
-
# In portrait mode: boundary between sharp/blur is very steep
|
| 126 |
-
# In real DoF: boundary is gradual
|
| 127 |
sharp_fraction = float(np.mean(sharp_region))
|
| 128 |
blur_fraction = float(np.mean(blur_region))
|
| 129 |
|
|
@@ -131,14 +128,38 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 131 |
blur_values = sharpness[blur_region] if np.any(blur_region) else np.array([0])
|
| 132 |
blur_uniformity = 1.0 - min(float(np.std(blur_values)) / (float(np.mean(blur_values)) + 1e-9), 1.0)
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
indicators["sharp_fraction"] = round(sharp_fraction, 3)
|
| 135 |
indicators["blur_fraction"] = round(blur_fraction, 3)
|
| 136 |
indicators["blur_uniformity"] = round(blur_uniformity, 3)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
indicators["portrait_mode_signature"] = True
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
# ββ 4. Screenshot detection βββββββββββββββββββββββββββββββββββββββ
|
| 144 |
# Screenshots have: perfect pixel edges, UI elements, uniform background areas
|
|
@@ -179,14 +200,29 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 179 |
modality = max(scores, key=scores.get)
|
| 180 |
confidence = min(1.0, scores[modality])
|
| 181 |
|
| 182 |
-
# Override:
|
| 183 |
-
|
|
|
|
|
|
|
| 184 |
modality = "PORTRAIT_MODE"
|
| 185 |
-
confidence = min(1.0, scores["PORTRAIT_MODE"]
|
| 186 |
|
| 187 |
-
# ββ 7. Build score adjustments
|
|
|
|
|
|
|
| 188 |
adjustments = _get_modality_adjustments(modality)
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
return ModalityResult(
|
| 191 |
modality=modality,
|
| 192 |
confidence=round(confidence, 3),
|
|
|
|
| 121 |
sharp_region = sharpness > sharp_thresh
|
| 122 |
blur_region = sharpness < blur_thresh
|
| 123 |
|
|
|
|
|
|
|
|
|
|
| 124 |
sharp_fraction = float(np.mean(sharp_region))
|
| 125 |
blur_fraction = float(np.mean(blur_region))
|
| 126 |
|
|
|
|
| 128 |
blur_values = sharpness[blur_region] if np.any(blur_region) else np.array([0])
|
| 129 |
blur_uniformity = 1.0 - min(float(np.std(blur_values)) / (float(np.mean(blur_values)) + 1e-9), 1.0)
|
| 130 |
|
| 131 |
+
# Check transition abruptness: in portrait mode the edge between sharp/blur is steep
|
| 132 |
+
# Compute gradient of sharpness map
|
| 133 |
+
sharpness_grad = np.hypot(
|
| 134 |
+
sobel(sharpness, axis=0),
|
| 135 |
+
sobel(sharpness, axis=1)
|
| 136 |
+
)
|
| 137 |
+
# High max gradient at the transition = abrupt = computational
|
| 138 |
+
max_transition = float(np.percentile(sharpness_grad, 99))
|
| 139 |
+
mean_transition = float(np.mean(sharpness_grad))
|
| 140 |
+
transition_abruptness = max_transition / (mean_transition + 1e-9)
|
| 141 |
+
|
| 142 |
indicators["sharp_fraction"] = round(sharp_fraction, 3)
|
| 143 |
indicators["blur_fraction"] = round(blur_fraction, 3)
|
| 144 |
indicators["blur_uniformity"] = round(blur_uniformity, 3)
|
| 145 |
+
indicators["transition_abruptness"] = round(transition_abruptness, 3)
|
| 146 |
+
|
| 147 |
+
# Portrait mode detection β relaxed thresholds + multiple signals
|
| 148 |
+
portrait_signals = 0
|
| 149 |
+
if blur_fraction > 0.2 and sharp_fraction > 0.1:
|
| 150 |
+
portrait_signals += 1
|
| 151 |
+
if blur_uniformity > 0.5:
|
| 152 |
+
portrait_signals += 1
|
| 153 |
+
if transition_abruptness > 5: # Abrupt edge = computational segmentation
|
| 154 |
+
portrait_signals += 1
|
| 155 |
+
|
| 156 |
+
# Strong portrait mode: at least 2 of 3 signals
|
| 157 |
+
if portrait_signals >= 2:
|
| 158 |
+
scores["PORTRAIT_MODE"] = scores.get("PORTRAIT_MODE", 0) + 0.3 * portrait_signals
|
| 159 |
indicators["portrait_mode_signature"] = True
|
| 160 |
+
elif portrait_signals == 1 and blur_fraction > 0.25:
|
| 161 |
+
scores["PORTRAIT_MODE"] = scores.get("PORTRAIT_MODE", 0) + 0.2
|
| 162 |
+
indicators["portrait_mode_weak"] = True
|
| 163 |
|
| 164 |
# ββ 4. Screenshot detection βββββββββββββββββββββββββββββββββββββββ
|
| 165 |
# Screenshots have: perfect pixel edges, UI elements, uniform background areas
|
|
|
|
| 200 |
modality = max(scores, key=scores.get)
|
| 201 |
confidence = min(1.0, scores[modality])
|
| 202 |
|
| 203 |
+
# Override: portrait mode wins over messaging/smartphone when detected
|
| 204 |
+
# (a portrait photo sent via WhatsApp should get PORTRAIT_MODE adjustments,
|
| 205 |
+
# which are a superset of MESSAGING adjustments for the tests that matter)
|
| 206 |
+
if scores.get("PORTRAIT_MODE", 0) > 0.2:
|
| 207 |
modality = "PORTRAIT_MODE"
|
| 208 |
+
confidence = min(1.0, scores["PORTRAIT_MODE"])
|
| 209 |
|
| 210 |
+
# ββ 7. Build score adjustments ββββββββββββββββββββββββββββββββββββ
|
| 211 |
+
# Merge adjustments when multiple modalities detected
|
| 212 |
+
# (e.g., portrait mode photo sent via messaging app gets BOTH sets)
|
| 213 |
adjustments = _get_modality_adjustments(modality)
|
| 214 |
|
| 215 |
+
# If messaging signals are present alongside portrait mode, merge messaging adjustments
|
| 216 |
+
if modality == "PORTRAIT_MODE" and scores.get("MESSAGING", 0) > 0.2:
|
| 217 |
+
messaging_adj = _get_modality_adjustments("MESSAGING")
|
| 218 |
+
for test_name, multiplier in messaging_adj.items():
|
| 219 |
+
if test_name not in adjustments:
|
| 220 |
+
adjustments[test_name] = multiplier
|
| 221 |
+
else:
|
| 222 |
+
# Take the more suppressive (lower) multiplier
|
| 223 |
+
adjustments[test_name] = min(adjustments[test_name], multiplier)
|
| 224 |
+
indicators["dual_modality"] = "PORTRAIT_MODE + MESSAGING"
|
| 225 |
+
|
| 226 |
return ModalityResult(
|
| 227 |
modality=modality,
|
| 228 |
confidence=round(confidence, 3),
|