Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

anky2002 commited on 14 days ago

Commit

390f4c5

verified ·

1 Parent(s): 4d66672

Upload agents/semantic_agent.py with huggingface_hub

Browse files

Files changed (1) hide show

agents/semantic_agent.py +99 -17

agents/semantic_agent.py CHANGED Viewed

@@ -226,7 +226,12 @@ Your 8 analysis domains:
 2. TIME OF DAY: Sky color/brightness must match shadow lengths and lighting direction. A bright blue sky requires short shadows (midday) or long shadows from a specific direction. Stars visible + brightly lit ground is contradictory.
-3. ERA / TECHNOLOGY ANACHRONISM: Visible technology (phones, cars, screens, signage style) should match the apparent era. A scene with 1950s architecture containing modern smartphones is suspicious. Fashion should match the apparent era of other objects.
 4. GEOGRAPHIC COHERENCE: Architecture style must match vegetation and climate. Tropical palm trees next to Northern European half-timbered houses is impossible. Road markings should match the apparent country (right-hand vs left-hand traffic, line styles). Visible text/signs should be in the expected language for the geography.
@@ -286,17 +291,22 @@ def _calibrate_vlm_confidence(raw_conf: float) -> float:
 def run_semantic_agent(img):
     findings, scores = [], []
     vlm_ok = True
-    for sys_p, usr_p, name, features in [
         (SYS_LIGHTING, USR_LIGHTING, "Lighting Physics",
          ["Shadow Direction","Shadow Quality","Specular Consistency","Ambient Occlusion",
-          "Color Temperature","Subsurface Scattering","Caustics","Inter-reflections"]),
         (SYS_ANATOMY, USR_ANATOMY, "Anatomical Analysis",
          ["Hand Anatomy","Facial Symmetry","Body Proportions","Skin Texture",
-          "Hair Consistency","Eye Details","Clothing Physics"]),
         (SYS_PHYSICS, USR_PHYSICS, "Physical Plausibility",
          ["Material Appearance","Perspective Geometry","Gravity & Structure",
-          "Scale & Proportion","Transparency","Contact Physics","Motion Coherence","Depth & Occlusion"]),
     ]:
         try:
             resp = _vlm(img, sys_p, usr_p)
@@ -304,18 +314,56 @@ def run_semantic_agent(img):
                 parsed = _parse(resp)
                 sc = _score(parsed)
-                # Calibrate VLM confidence before storing
                 raw_conf = parsed.get("confidence", 0.5)
                 cal_conf = _calibrate_vlm_confidence(raw_conf)
                 if name == "Anatomical Analysis" and not parsed.get("contains_people", True):
-                    sc = 0.0
                 anomalies = parsed.get("anomalies", [])
                 for feat in features:
-                    findings.append({"test": feat, "score": sc / len(features),
-                                   "note": parsed.get("explanation", "")[:100], "parent": name})
-                    scores.append(sc / len(features))
                 findings.append({"test": name, "vlm_analysis": parsed, "anomalies": anomalies,
                                "score": sc, "confidence": cal_conf,
@@ -323,15 +371,18 @@ def run_semantic_agent(img):
                                "calibrated_confidence": cal_conf,
                                "note": parsed.get("explanation", "")[:200]})
                 scores.append(sc)
             else:
                 vlm_ok = False
                 for feat in features:
                     findings.append({"test": feat, "score": 0.0, "note": "VLM unavailable", "vlm_error": True})
-                    scores.append(0.0)
         except Exception as e:
             findings.append({"test": name, "error": str(e), "score": 0})
-    # Context plausibility (expanded to 8 sub-features)
     try:
         resp = _vlm(img, SYS_CONTEXT, USR_CONTEXT)
         if resp and not resp.startswith("VLM_ERROR"):
@@ -347,27 +398,58 @@ def run_semantic_agent(img):
                 findings.append({"test": feat, "score": sc / len(context_features),
                                "note": parsed.get("explanation", "")[:100], "parent": "Context"})
                 scores.append(sc / len(context_features))
             findings.append({"test": "Context Plausibility", "vlm_analysis": parsed,
                            "score": sc, "confidence": cal_conf,
                            "note": parsed.get("explanation", "")[:200]})
             scores.append(sc)
         else:
             vlm_ok = False
     except:
         pass
-    avg = float(np.mean(scores)) if scores else 0.0
-    conf = min(1.0, 0.4 + 0.5 * abs(avg))
     if not vlm_ok:
         conf *= 0.3
-    viol = [f["test"] for f in findings if f.get("score", 0) > 0.15 and "parent" not in f]
-    comp = [f["test"] for f in findings if f.get("score", 0) < -0.1 and "parent" not in f]
     rat = f"Semantic violations: {', '.join(viol[:5])}." if viol else \
           f"Semantically consistent: {', '.join(comp[:5])}." if comp else "Semantic inconclusive."
     for f in findings:
-        if f.get("note") and "parent" not in f:
             rat += f" [{f['test']}]: {f['note'][:100]}."
     return AgentEvidence("Semantic Consistency Agent", np.clip(avg, -1, 1), conf,

 2. TIME OF DAY: Sky color/brightness must match shadow lengths and lighting direction. A bright blue sky requires short shadows (midday) or long shadows from a specific direction. Stars visible + brightly lit ground is contradictory.
+3. ERA / TECHNOLOGY ANACHRONISM: Visible technology must match the apparent era of other objects in the scene. Use these concrete anchors:
+   - Pre-1990: No flat-screen TVs, no smartphones, no LED lighting, no modern car designs (rounded headlights, DRLs). CRT monitors only. Wired phones only.
+   - 1990-2005: Flip phones and early Nokias OK, but no touchscreen smartphones. Boxy CRT monitors, not flat panels. Boxy car designs.
+   - 2005-2015: Early smartphones OK, but no notched/hole-punch screens. Flat panels exist but bezels are thick.
+   - Post-2015: Thin-bezel phones, wireless earbuds, USB-C cables, modern LED strip lighting.
+   If the scene mixes eras (1950s architecture + a person holding a modern iPhone), flag it. Fashion should match the era of other visible technology.
 4. GEOGRAPHIC COHERENCE: Architecture style must match vegetation and climate. Tropical palm trees next to Northern European half-timbered houses is impossible. Road markings should match the apparent country (right-hand vs left-hand traffic, line styles). Visible text/signs should be in the expected language for the geography.
 def run_semantic_agent(img):
     findings, scores = [], []
     vlm_ok = True
+    n_applicable = 0  # Track how many sub-features were actually applicable
+    n_total = 0       # Track total sub-features attempted
+    for sys_p, usr_p, name, features, null_fields in [
         (SYS_LIGHTING, USR_LIGHTING, "Lighting Physics",
          ["Shadow Direction","Shadow Quality","Specular Consistency","Ambient Occlusion",
+          "Color Temperature","Subsurface Scattering","Caustics","Inter-reflections"],
+         {"sss_correct", "caustics_correct", "interreflections_ok"}),
         (SYS_ANATOMY, USR_ANATOMY, "Anatomical Analysis",
          ["Hand Anatomy","Facial Symmetry","Body Proportions","Skin Texture",
+          "Hair Consistency","Eye Details","Clothing Physics"],
+         set()),
         (SYS_PHYSICS, USR_PHYSICS, "Physical Plausibility",
          ["Material Appearance","Perspective Geometry","Gravity & Structure",
+          "Scale & Proportion","Transparency","Contact Physics","Motion Coherence","Depth & Occlusion"],
+         {"transparency_ok", "motion_ok"}),
     ]:
         try:
             resp = _vlm(img, sys_p, usr_p)
                 parsed = _parse(resp)
                 sc = _score(parsed)
                 raw_conf = parsed.get("confidence", 0.5)
                 cal_conf = _calibrate_vlm_confidence(raw_conf)
+                # Fix 3: Anatomy on non-human images → tag as not_applicable
                 if name == "Anatomical Analysis" and not parsed.get("contains_people", True):
+                    for feat in features:
+                        findings.append({"test": feat, "score": 0.0,
+                                       "note": "No people in image — not applicable",
+                                       "not_applicable": True, "parent": name})
+                        # NOT added to scores — these should not dilute the posterior
+                    n_total += len(features)
+                    findings.append({"test": name, "vlm_analysis": parsed,
+                                   "score": 0.0, "confidence": cal_conf,
+                                   "not_applicable": True,
+                                   "note": "No people detected — anatomy analysis skipped"})
+                    continue
                 anomalies = parsed.get("anomalies", [])
+                # Fix 2: Count applicable sub-features (exclude nulls)
+                applicable_features = []
                 for feat in features:
+                    # Check if VLM returned null for the corresponding field
+                    field_map = {f: k for f, k in zip(features, parsed.keys()) if k in null_fields}
+                    is_null = False
+                    for nf in null_fields:
+                        if parsed.get(nf) is None:
+                            # Map null field back to feature name (approximate)
+                            if any(nf_word in feat.lower() for nf_word in nf.replace("_ok","").replace("_correct","").split("_")):
+                                is_null = True
+                                break
+                    if is_null:
+                        findings.append({"test": feat, "score": 0.0,
+                                       "note": "Not applicable to this image",
+                                       "not_applicable": True, "parent": name})
+                        n_total += 1
+                    else:
+                        applicable_features.append(feat)
+                # Distribute score only across applicable features
+                n_applicable_here = len(applicable_features)
+                if n_applicable_here > 0:
+                    per_feat_score = sc / n_applicable_here
+                    for feat in applicable_features:
+                        findings.append({"test": feat, "score": per_feat_score,
+                                       "note": parsed.get("explanation", "")[:100], "parent": name})
+                        scores.append(per_feat_score)
+                        n_applicable += 1
+                        n_total += 1
                 findings.append({"test": name, "vlm_analysis": parsed, "anomalies": anomalies,
                                "score": sc, "confidence": cal_conf,
                                "calibrated_confidence": cal_conf,
                                "note": parsed.get("explanation", "")[:200]})
                 scores.append(sc)
+                n_applicable += 1
+                n_total += 1
             else:
                 vlm_ok = False
                 for feat in features:
                     findings.append({"test": feat, "score": 0.0, "note": "VLM unavailable", "vlm_error": True})
+                n_total += len(features)
         except Exception as e:
             findings.append({"test": name, "error": str(e), "score": 0})
+            n_total += 1
+    # Context plausibility
     try:
         resp = _vlm(img, SYS_CONTEXT, USR_CONTEXT)
         if resp and not resp.startswith("VLM_ERROR"):
                 findings.append({"test": feat, "score": sc / len(context_features),
                                "note": parsed.get("explanation", "")[:100], "parent": "Context"})
                 scores.append(sc / len(context_features))
+                n_applicable += 1
+                n_total += 1
             findings.append({"test": "Context Plausibility", "vlm_analysis": parsed,
                            "score": sc, "confidence": cal_conf,
                            "note": parsed.get("explanation", "")[:200]})
             scores.append(sc)
+            n_applicable += 1
+            n_total += 1
         else:
             vlm_ok = False
     except:
         pass
+    # Fix 1: Confidence floor — distinguish genuinely neutral from cancelled-out
+    if scores:
+        avg = float(np.mean(scores))
+        # Check if scores genuinely agree on neutral vs. cancelling each other out
+        score_signs = [1 if s > 0.05 else (-1 if s < -0.05 else 0) for s in scores]
+        n_positive = sum(1 for s in score_signs if s > 0)
+        n_negative = sum(1 for s in score_signs if s < 0)
+        n_neutral = sum(1 for s in score_signs if s == 0)
+        if n_positive > 0 and n_negative > 0:
+            # Scores cancelled out — LOW confidence, not 0.4
+            agreement = max(n_positive, n_negative) / (n_positive + n_negative)
+            conf = min(1.0, 0.15 + 0.5 * abs(avg) * agreement)
+        elif n_neutral == len(score_signs):
+            # Everything genuinely neutral (VLM said 0 for everything) — low confidence
+            conf = 0.2
+        else:
+            # Scores agree in direction — confidence scales with magnitude
+            conf = min(1.0, 0.3 + 0.6 * abs(avg))
+        # Scale by coverage: fewer applicable features = lower confidence
+        coverage = n_applicable / max(n_total, 1)
+        conf *= max(0.3, coverage)
+    else:
+        avg = 0.0
+        conf = 0.1
     if not vlm_ok:
         conf *= 0.3
+    viol = [f["test"] for f in findings if f.get("score", 0) > 0.15
+            and "parent" not in f and not f.get("not_applicable")]
+    comp = [f["test"] for f in findings if f.get("score", 0) < -0.1
+            and "parent" not in f and not f.get("not_applicable")]
     rat = f"Semantic violations: {', '.join(viol[:5])}." if viol else \
           f"Semantically consistent: {', '.join(comp[:5])}." if comp else "Semantic inconclusive."
     for f in findings:
+        if f.get("note") and "parent" not in f and not f.get("not_applicable"):
             rat += f" [{f['test']}]: {f['note'][:100]}."
     return AgentEvidence("Semantic Consistency Agent", np.clip(avg, -1, 1), conf,