Upload agents/modality_detector.py with huggingface_hub
Browse files- agents/modality_detector.py +92 -5
agents/modality_detector.py
CHANGED
|
@@ -113,6 +113,51 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 113 |
scores["PORTRAIT_MODE"] = portrait_score
|
| 114 |
indicators["portrait_detected"] = True
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
# ββ Screenshot detection ββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
edge_mag = np.hypot(sobel(gray, 0), sobel(gray, 1))
|
| 118 |
strong = edge_mag > np.percentile(edge_mag, 95)
|
|
@@ -192,8 +237,12 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 192 |
modality = max(scores, key=scores.get)
|
| 193 |
conf = min(1.0, scores[modality])
|
| 194 |
|
| 195 |
-
#
|
| 196 |
-
if scores.get("
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
modality = "PORTRAIT_MODE"
|
| 198 |
conf = min(1.0, scores["PORTRAIT_MODE"])
|
| 199 |
|
|
@@ -204,12 +253,17 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 204 |
indicators["safety_override"] = "Low-detail image β suppression disabled"
|
| 205 |
|
| 206 |
# SAFETY GUARD 2: No Bayer pattern = not from a real camera sensor.
|
| 207 |
-
#
|
| 208 |
-
#
|
| 209 |
if not has_bayer and modality in ("PORTRAIT_MODE", "SMARTPHONE", "MESSAGING"):
|
| 210 |
modality = "UNKNOWN"
|
| 211 |
conf = 0.2
|
| 212 |
indicators["safety_override"] = f"No Bayer CFA pattern (margin={bayer_margin:.3f}) β not from a real camera sensor. All suppression disabled."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
# βββ BUILD ADJUSTMENTS ββββββββββββββββββββββββββββββββββββββββββββ
|
| 215 |
|
|
@@ -222,13 +276,46 @@ def detect_modality(img: Image.Image) -> ModalityResult:
|
|
| 222 |
adjustments[k] = min(adjustments.get(k, 1.0), v)
|
| 223 |
indicators["dual_modality"] = "PORTRAIT_MODE + MESSAGING"
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
indicators["modality_scores"] = {k: round(v, 3) for k, v in scores.items()}
|
| 226 |
|
| 227 |
return ModalityResult(modality, round(conf, 3), indicators, adjustments)
|
| 228 |
|
| 229 |
|
| 230 |
def _get_modality_adjustments(modality: str) -> dict:
|
| 231 |
-
if modality == "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
return {
|
| 233 |
"Autocorrelation Peak": 0.1,
|
| 234 |
"Texture Repetition": 0.1,
|
|
|
|
| 113 |
scores["PORTRAIT_MODE"] = portrait_score
|
| 114 |
indicators["portrait_detected"] = True
|
| 115 |
|
| 116 |
+
# ββ Macro/DSLR shallow DoF detection βββββββββββββββββββββββββββββ
|
| 117 |
+
# Macro photos have: extreme sharpness ratio (center vs edge), very high
|
| 118 |
+
# bimodal ratio, large uniform blur region, and Bayer pattern present.
|
| 119 |
+
# Key difference from portrait mode: sharpness ratio is much higher (>5)
|
| 120 |
+
# because macro lenses produce more extreme DoF than phone portrait mode.
|
| 121 |
+
|
| 122 |
+
# Compute center-vs-edge sharpness ratio
|
| 123 |
+
ch, cw = gray.shape
|
| 124 |
+
center_region = sharpness[ch//4:3*ch//4, cw//4:3*cw//4]
|
| 125 |
+
edge_region = np.concatenate([
|
| 126 |
+
sharpness[:ch//4, :].ravel(),
|
| 127 |
+
sharpness[3*ch//4:, :].ravel(),
|
| 128 |
+
sharpness[:, :cw//4].ravel(),
|
| 129 |
+
sharpness[:, 3*cw//4:].ravel(),
|
| 130 |
+
])
|
| 131 |
+
center_sharp = float(np.percentile(center_region, 90))
|
| 132 |
+
edge_sharp = float(np.mean(edge_region))
|
| 133 |
+
sharpness_ratio = center_sharp / (edge_sharp + 1e-9)
|
| 134 |
+
|
| 135 |
+
# Background uniformity: how uniform is the blurred region's color?
|
| 136 |
+
blur_region_pixels = gray[blur_region] if np.any(blur_region) else np.array([128])
|
| 137 |
+
bg_color_std = float(np.std(blur_region_pixels))
|
| 138 |
+
|
| 139 |
+
indicators["sharpness_ratio"] = round(sharpness_ratio, 2)
|
| 140 |
+
indicators["bg_color_std"] = round(bg_color_std, 2)
|
| 141 |
+
|
| 142 |
+
macro_score = 0.0
|
| 143 |
+
if has_detail and sharpness_ratio > 3.0:
|
| 144 |
+
macro_score += 0.25 # Extreme center/edge sharpness difference
|
| 145 |
+
if has_detail and blur_frac > 0.35:
|
| 146 |
+
macro_score += 0.2 # Large blur region
|
| 147 |
+
if has_detail and bimodal_ratio > 1.5:
|
| 148 |
+
macro_score += 0.2 # Strong bimodal sharpness
|
| 149 |
+
if has_detail and bg_color_std < 40:
|
| 150 |
+
macro_score += 0.15 # Uniform background color (bokeh'd)
|
| 151 |
+
|
| 152 |
+
# Macro requires Bayer OR high EXIF evidence (Unsplash strips some Bayer)
|
| 153 |
+
# Allow macro without Bayer if other signals are very strong
|
| 154 |
+
if macro_score >= 0.6:
|
| 155 |
+
scores["MACRO_DSLR"] = macro_score
|
| 156 |
+
indicators["macro_detected"] = True
|
| 157 |
+
elif macro_score >= 0.4 and has_bayer:
|
| 158 |
+
scores["MACRO_DSLR"] = macro_score
|
| 159 |
+
indicators["macro_detected"] = True
|
| 160 |
+
|
| 161 |
# ββ Screenshot detection ββββββββββββββββββββββββββββββββββββββββββ
|
| 162 |
edge_mag = np.hypot(sobel(gray, 0), sobel(gray, 1))
|
| 163 |
strong = edge_mag > np.percentile(edge_mag, 95)
|
|
|
|
| 237 |
modality = max(scores, key=scores.get)
|
| 238 |
conf = min(1.0, scores[modality])
|
| 239 |
|
| 240 |
+
# Macro DSLR wins over portrait mode when detected (more specific)
|
| 241 |
+
if scores.get("MACRO_DSLR", 0) >= 0.4:
|
| 242 |
+
modality = "MACRO_DSLR"
|
| 243 |
+
conf = min(1.0, scores["MACRO_DSLR"])
|
| 244 |
+
# Portrait mode wins when detected and no macro
|
| 245 |
+
elif scores.get("PORTRAIT_MODE", 0) > 0.3:
|
| 246 |
modality = "PORTRAIT_MODE"
|
| 247 |
conf = min(1.0, scores["PORTRAIT_MODE"])
|
| 248 |
|
|
|
|
| 253 |
indicators["safety_override"] = "Low-detail image β suppression disabled"
|
| 254 |
|
| 255 |
# SAFETY GUARD 2: No Bayer pattern = not from a real camera sensor.
|
| 256 |
+
# Exception: MACRO_DSLR with very strong signals can bypass this
|
| 257 |
+
# (Unsplash CDN processing can strip Bayer traces from real DSLR photos)
|
| 258 |
if not has_bayer and modality in ("PORTRAIT_MODE", "SMARTPHONE", "MESSAGING"):
|
| 259 |
modality = "UNKNOWN"
|
| 260 |
conf = 0.2
|
| 261 |
indicators["safety_override"] = f"No Bayer CFA pattern (margin={bayer_margin:.3f}) β not from a real camera sensor. All suppression disabled."
|
| 262 |
+
elif not has_bayer and modality == "MACRO_DSLR" and scores.get("MACRO_DSLR", 0) < 0.6:
|
| 263 |
+
# Weak macro signal without Bayer β don't trust it
|
| 264 |
+
modality = "UNKNOWN"
|
| 265 |
+
conf = 0.2
|
| 266 |
+
indicators["safety_override"] = f"Macro signal weak + no Bayer β suppression disabled"
|
| 267 |
|
| 268 |
# βββ BUILD ADJUSTMENTS ββββββββββββββββββββββββββββββββββββββββββββ
|
| 269 |
|
|
|
|
| 276 |
adjustments[k] = min(adjustments.get(k, 1.0), v)
|
| 277 |
indicators["dual_modality"] = "PORTRAIT_MODE + MESSAGING"
|
| 278 |
|
| 279 |
+
# Merge messaging adjustments when macro + messaging both detected
|
| 280 |
+
if modality == "MACRO_DSLR" and scores.get("MESSAGING", 0) > 0.15:
|
| 281 |
+
msg_adj = _get_modality_adjustments("MESSAGING")
|
| 282 |
+
for k, v in msg_adj.items():
|
| 283 |
+
adjustments[k] = min(adjustments.get(k, 1.0), v)
|
| 284 |
+
indicators["dual_modality"] = "MACRO_DSLR + MESSAGING"
|
| 285 |
+
|
| 286 |
indicators["modality_scores"] = {k: round(v, 3) for k, v in scores.items()}
|
| 287 |
|
| 288 |
return ModalityResult(modality, round(conf, 3), indicators, adjustments)
|
| 289 |
|
| 290 |
|
| 291 |
def _get_modality_adjustments(modality: str) -> dict:
|
| 292 |
+
if modality == "MACRO_DSLR":
|
| 293 |
+
return {
|
| 294 |
+
# Extreme shallow DoF creates uniform bokeh β high autocorrelation
|
| 295 |
+
"Autocorrelation Peak": 0.1,
|
| 296 |
+
"Texture Repetition": 0.1,
|
| 297 |
+
"DoF Consistency": 0.1,
|
| 298 |
+
# JPEG delivery pipeline removes sensor traces
|
| 299 |
+
"Bayer CFA Pattern": 0.3,
|
| 300 |
+
"CFA Nyquist": 0.3,
|
| 301 |
+
"PRNU Uniformity": 0.2,
|
| 302 |
+
"Demosaic Interpolation": 0.4,
|
| 303 |
+
# Bimodal content (sharp subject + smooth bokeh) creates extreme kurtosis
|
| 304 |
+
# This is physics, not AI sharpening. Suppress the ceiling trigger.
|
| 305 |
+
"DCT Kurtosis": 0.1,
|
| 306 |
+
"Wavelet Kurtosis": 0.1,
|
| 307 |
+
"Spectral Slope 1/fΒ²": 0.4,
|
| 308 |
+
"Spectral Symmetry": 0.3,
|
| 309 |
+
"Phase Coherence": 0.4,
|
| 310 |
+
# Noise inconsistency between sharp subject and bokeh
|
| 311 |
+
"Noise Spatial Frequency": 0.2,
|
| 312 |
+
"Poisson-Gaussian Model": 0.2,
|
| 313 |
+
"HF Noise Structure": 0.3,
|
| 314 |
+
"Pixel Response Linearity": 0.4,
|
| 315 |
+
"Saturation Clipping": 0.4,
|
| 316 |
+
"VAE Patch Boundaries": 0.3,
|
| 317 |
+
}
|
| 318 |
+
elif modality == "PORTRAIT_MODE":
|
| 319 |
return {
|
| 320 |
"Autocorrelation Peak": 0.1,
|
| 321 |
"Texture Repetition": 0.1,
|