anky2002 commited on
Commit
0d484f2
Β·
verified Β·
1 Parent(s): 60d6a7a

Upload agents/semantic_agent.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. agents/semantic_agent.py +254 -133
agents/semantic_agent.py CHANGED
@@ -1,5 +1,11 @@
1
- """FORENSIQ β€” Semantic Consistency Agent (23 features via VLM)
2
- Uses Qwen2.5-VL-72B with expert forensic prompts for deep visual reasoning.
 
 
 
 
 
 
3
  """
4
  import os, base64, io, json, re, numpy as np
5
  from PIL import Image
@@ -44,211 +50,326 @@ def _score(parsed):
44
  if v=="AUTHENTIC": return -0.4
45
  return 0.0
46
 
47
- # ═══ SYSTEM PROMPTS (23 features grouped into 5 VLM calls) ═══════════
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- SYS_LIGHTING = """You are a world-class forensic photogrammetrist with 20+ years analyzing lighting in images for legal proceedings. You understand radiometry, photometry, and the physics of light transport at an expert level.
50
 
51
- Your analysis capabilities:
52
- 1. SHADOW GEOMETRY: Trace every shadow to its casting object. All shadow vectors must converge to consistent light source position(s). Shadow length encodes sun elevation via tan(ΞΈ) = object_height/shadow_length. Penumbra width encodes light source angular size.
53
- 2. INVERSE SQUARE LAW: Light intensity I = P/(4Ο€rΒ²). Surfaces equidistant from a point light must have equal irradiance. Check illumination falloff on flat surfaces (walls, floors, tables).
54
- 3. SPECULAR HIGHLIGHTS: Each specular reflection encodes light source direction via the reflection law (angle of incidence = angle of reflection). Check that specular highlights across different objects in the scene are consistent with the same light source(s).
55
- 4. AMBIENT OCCLUSION: Contact shadows and ambient occlusion should be darkest in concavities and where objects touch surfaces. AI often forgets these subtle cues.
56
- 5. COLOR TEMPERATURE: All illuminated surfaces under the same light should share its color temperature. Mixed lighting (warm/cool) must be physically motivated (window + lamp).
57
- 6. SUBSURFACE SCATTERING: Thin objects (ears, fingers, leaves) backlit by strong light should show red/warm translucency. AI rarely gets this right.
58
- 7. CAUSTICS: Light through transparent objects (glass, water) creates caustic patterns. If present, they must match the refracting geometry.
59
- 8. INTER-REFLECTIONS: Colored surfaces bounce colored light onto nearby surfaces. A red wall should tint nearby white objects slightly red.
60
 
61
- Report ALL violations with specific image region references. Be precise and clinical."""
62
 
63
- USR_LIGHTING = """Perform a complete lighting forensic analysis of this image.
64
 
65
- For each of these 8 sub-analyses, provide a separate assessment:
66
- 1. Shadow Direction Convergence β€” trace visible shadows, do they converge?
67
- 2. Inverse Square Law β€” does light intensity fall off naturally?
68
- 3. Specular Highlight Consistency β€” are reflections physically consistent?
69
- 4. Ambient Occlusion β€” are contact shadows present and correct?
70
- 5. Color Temperature Consistency β€” does illumination color match across the scene?
71
- 6. Subsurface Scattering β€” if thin translucent objects are visible, is SSS correct?
72
- 7. Caustics β€” if transparent objects are present, are caustics correct?
73
- 8. Inter-reflections β€” do colored surfaces bounce light correctly?
 
 
74
 
75
  Respond in JSON:
76
  {
77
- "shadow_convergent": true/false,
78
- "inverse_square_ok": true/false,
79
  "specular_consistent": true/false,
80
- "ambient_occlusion_ok": true/false,
81
  "color_temp_consistent": true/false,
82
  "sss_correct": true/false/null,
83
  "caustics_correct": true/false/null,
84
  "interreflections_ok": true/false/null,
85
- "anomalies": ["specific anomaly descriptions"],
86
  "confidence": 0.0-1.0,
87
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
88
- "explanation": "detailed reasoning citing specific image regions"
89
  }"""
90
 
91
- SYS_ANATOMY = """You are a forensic anatomist and medical illustrator with encyclopedic knowledge of human body structure. AI-generated images violate anatomy in specific, detectable ways.
92
 
93
- Your detection capabilities:
94
- 1. HANDS: Exactly 5 fingers per hand. Each finger has 3 phalanges (thumb: 2). Joints bend in ONE direction only. Nails are on the dorsal side. Thumb opposes other fingers. Palm lines, knuckle creases, and tendons must be consistent.
95
- 2. FACIAL STRUCTURE: Bilateral symmetry (not perfect, but close). Eyes at same height, same size, same iris color. Ears at eye level, same size and shape. Teeth follow dental arch. Nostrils are symmetric.
96
- 3. BODY PROPORTIONS: Head β‰ˆ 1/7.5 of body height. Arm span β‰ˆ height. Legs β‰ˆ 50% of height. Elbow at waist level. Knee at mid-leg.
97
- 4. SKIN TEXTURE: Consistent pore density. Wrinkles follow muscle fiber directions. No texture discontinuities.
98
- 5. HAIR: Consistent direction of growth. No floating strands disconnected from scalp. Hairline follows natural patterns.
99
- 6. EYES: Catchlight reflections should match between eyes and match the lighting direction. Iris has consistent color and pattern. Sclera is white with subtle veins.
100
- 7. CLOTHING/ACCESSORIES: Fabric drapes under gravity. Seams are continuous. Buttons/zippers are physically connected. Jewelry doesn't float.
101
 
102
- Count fingers explicitly. Note any impossible joint angles. Check ear symmetry precisely."""
103
 
104
- USR_ANATOMY = """Perform a thorough anatomical forensic analysis of this image.
 
 
 
 
 
 
105
 
106
- Analyze each of these 7 categories:
107
- 1. HAND ANATOMY β€” Count fingers on each visible hand. Check joint angles, nail placement, proportions.
108
- 2. FACIAL SYMMETRY β€” Check eye alignment, ear symmetry, nose/mouth centering, teeth.
109
- 3. BODY PROPORTIONS β€” Check limb ratios, joint positions, head-to-body ratio.
110
- 4. SKIN & TEXTURE β€” Check pore consistency, wrinkle patterns, texture continuity.
111
- 5. HAIR β€” Check growth direction, hairline, strand connectivity.
112
- 6. EYE DETAILS β€” Check catchlights, iris consistency, sclera, eyelash direction.
113
- 7. CLOTHING PHYSICS β€” Check fabric draping, seam continuity, accessory placement.
114
 
115
- If NO people are visible, set contains_people=false.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  Respond in JSON:
118
  {
119
  "contains_people": true/false,
120
  "hands_correct": true/false/null,
121
- "finger_count": "e.g. 'Left: 5, Right: 5' or 'Left: 6 (extra pinky)'",
122
  "face_symmetric": true/false/null,
123
  "proportions_ok": true/false/null,
124
  "skin_natural": true/false/null,
125
  "hair_natural": true/false/null,
126
  "eyes_consistent": true/false/null,
127
  "clothing_ok": true/false/null,
128
- "anomalies": ["specific anatomical errors"],
129
  "confidence": 0.0-1.0,
130
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
131
- "explanation": "detailed reasoning with specific observations"
132
  }"""
133
 
134
- SYS_PHYSICS = """You are a forensic physicist specializing in physical plausibility analysis. Generative AI learns visual patterns but does NOT understand physics. Your job is to find violations.
135
-
136
- Your analysis domains:
137
- 1. MATERIAL BRDF: Metals are specular and reflect environment. Glass refracts and distorts background. Matte surfaces have diffuse reflection only. Wet surfaces have higher specularity. The same material must have consistent reflectance properties.
138
- 2. PERSPECTIVE GEOMETRY: All parallel lines in 3D converge to the same vanishing point. Vertical lines should remain vertical (unless tilted camera). Objects at the same distance should have the same scale.
139
- 3. GRAVITY & MECHANICS: Objects rest on surfaces, not float. Liquids are level. Fabric drapes downward. Hair falls with gravity (unless in motion). Structures must be load-bearing.
140
- 4. SCALE CONSISTENCY: Known objects (people, cars, doors, furniture) have known sizes. Check relative proportions.
141
- 5. TRANSPARENCY & REFRACTION: Glass distorts what's behind it. Water refracts objects below the surface. Transparency should be consistent with material thickness.
142
- 6. CONTACT & INTERACTION: Objects touching surfaces have contact shadows. Weight deforms soft surfaces. Reflections on surfaces show correct geometry.
143
- 7. MOTION CONSISTENCY: If motion blur is present, it should be consistent with object velocity and direction. Frozen motion should show physically plausible pose.
144
- 8. DEPTH ORDERING: Objects closer should occlude objects farther. No impossible overlaps."""
145
-
146
- USR_PHYSICS = """Analyze this image for violations of physical laws across 8 domains:
147
- 1. Material BRDF consistency β€” are surface reflections physically correct?
148
- 2. Perspective geometry β€” do parallel lines converge correctly?
149
- 3. Gravity and mechanics β€” do objects obey gravity?
150
- 4. Scale consistency β€” are objects proportional?
151
- 5. Transparency/refraction β€” do transparent objects distort correctly?
152
- 6. Contact and interaction β€” correct shadows and deformation?
153
- 7. Motion consistency β€” is blur/motion physically plausible?
154
- 8. Depth ordering β€” correct occlusion?
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  Respond in JSON:
157
  {
158
- "brdf_consistent": true/false,
159
  "perspective_correct": true/false,
160
  "gravity_ok": true/false,
161
  "scale_consistent": true/false,
162
  "transparency_ok": true/false/null,
163
- "contact_correct": true/false,
164
  "motion_ok": true/false/null,
165
  "depth_ordering_ok": true/false,
166
- "anomalies": ["specific physics violations"],
167
  "confidence": 0.0-1.0,
168
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
169
- "explanation": "detailed reasoning"
170
  }"""
171
 
172
- SYS_CONTEXT = """You are a forensic scene analyst who evaluates whether an image's content is contextually plausible. AI-generated images often combine elements that shouldn't coexist.
173
 
174
- Your analysis:
175
- 1. TEMPORAL CONSISTENCY: Season (foliage, clothing), time of day (sky, shadows, lighting), era (technology, fashion).
176
- 2. GEOGRAPHIC CONSISTENCY: Architecture style matches vegetation. Road markings match country. Signs are in expected language.
177
- 3. WEATHER CONSISTENCY: Sky matches ground conditions. Wet ground β†’ overcast or recent rain. Snow β†’ cold-weather attire.
178
- 4. SOCIAL PLAUSIBILITY: People's attire matches setting. Group interactions are natural. No impossible crowd configurations.
179
- 5. OBJECT RELATIONSHIPS: Furniture is functional. Appliances are connected. Tools are held correctly."""
 
 
 
 
 
 
 
 
 
180
 
181
- USR_CONTEXT = """Analyze contextual plausibility across 5 domains:
182
- 1. Temporal β€” season, time of day, era consistency
183
- 2. Geographic β€” architecture, vegetation, signage consistency
184
- 3. Weather β€” sky vs ground conditions
185
- 4. Social β€” attire, interactions, crowd plausibility
186
- 5. Object relationships β€” functional arrangement
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  Respond in JSON:
189
  {
190
- "temporal_consistent": true/false,
 
 
191
  "geographic_consistent": true/false,
192
  "weather_consistent": true/false,
193
- "social_plausible": true/false,
 
194
  "objects_functional": true/false,
195
- "anomalies": ["specific contextual violations"],
196
  "confidence": 0.0-1.0,
197
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
198
- "explanation": "reasoning"
199
  }"""
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def run_semantic_agent(img):
202
- findings,scores=[],[]
203
- vlm_ok=True
204
 
205
- for sys_p,usr_p,name,features in [
206
- (SYS_LIGHTING, USR_LIGHTING, "Lighting Physics", ["Shadow Convergence","Inverse Square Law","Specular Consistency","Ambient Occlusion","Color Temperature","Subsurface Scattering","Caustics","Inter-reflections"]),
207
- (SYS_ANATOMY, USR_ANATOMY, "Anatomical Analysis", ["Hand Anatomy","Facial Symmetry","Body Proportions","Skin Texture","Hair","Eye Details","Clothing Physics"]),
208
- (SYS_PHYSICS, USR_PHYSICS, "Physical Plausibility", ["Material BRDF","Perspective Geometry","Gravity","Scale","Transparency","Contact","Motion","Depth Ordering"]),
 
 
 
 
 
 
209
  ]:
210
  try:
211
- resp=_vlm(img,sys_p,usr_p)
212
  if resp and not resp.startswith("VLM_ERROR"):
213
- parsed=_parse(resp)
214
- sc=_score(parsed)
215
- if name=="Anatomical Analysis" and not parsed.get("contains_people",True):
216
- sc=0.0
217
- # Create sub-findings for each feature
218
- anomalies=parsed.get("anomalies",[])
 
 
 
 
 
219
  for feat in features:
220
- findings.append({"test":feat,"score":sc/len(features),"note":parsed.get("explanation","")[:100],"parent":name})
221
- scores.append(sc/len(features))
222
- findings.append({"test":name,"vlm_analysis":parsed,"anomalies":anomalies,
223
- "score":sc,"confidence":parsed.get("confidence",0.5),
224
- "note":parsed.get("explanation","")[:200]})
 
 
 
 
225
  scores.append(sc)
226
  else:
227
- vlm_ok=False
228
  for feat in features:
229
- findings.append({"test":feat,"score":0.0,"note":"VLM unavailable","vlm_error":True})
230
  scores.append(0.0)
231
  except Exception as e:
232
- findings.append({"test":name,"error":str(e),"score":0})
233
 
234
- # Context plausibility (separate call)
235
  try:
236
- resp=_vlm(img,SYS_CONTEXT,USR_CONTEXT)
237
  if resp and not resp.startswith("VLM_ERROR"):
238
- parsed=_parse(resp); sc=_score(parsed)
239
- for feat in ["Temporal","Geographic","Weather","Social","Object Relations"]:
240
- findings.append({"test":feat+" Plausibility","score":sc/5,"note":parsed.get("explanation","")[:100]})
241
- scores.append(sc/5)
242
- else: vlm_ok=False
243
- except: pass
244
-
245
- avg=float(np.mean(scores)) if scores else 0.0
246
- conf=min(1.0,0.4+0.5*abs(avg))
247
- if not vlm_ok: conf*=0.3
248
- viol=[f["test"] for f in findings if f.get("score",0)>0.15 and "parent" not in f]
249
- comp=[f["test"] for f in findings if f.get("score",0)<-0.1 and "parent" not in f]
250
- rat=f"Semantic violations: {', '.join(viol[:5])}." if viol else f"Semantically consistent: {', '.join(comp[:5])}." if comp else "Semantic inconclusive."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  for f in findings:
252
- if f.get("note") and "parent" not in f: rat+=f" [{f['test']}]: {f['note'][:100]}."
253
- return AgentEvidence("Semantic Consistency Agent",np.clip(avg,-1,1),conf,
254
- 0.0 if vlm_ok else 0.8, rat, [f for f in findings if "parent" not in f])
 
 
 
 
1
+ """FORENSIQ β€” Semantic Consistency Agent (31 features via VLM)
2
+ Uses Qwen2.5-VL-72B with calibrated forensic prompts.
3
+
4
+ Design principles applied from review:
5
+ - Qualitative inconsistency detection, NOT metric estimation from 2D images
6
+ - Explicit phenomenon ownership: Lighting owns illumination, Physics owns geometry/materials
7
+ - Confidence calibration instructions in every prompt
8
+ - Expanded Context prompt (5β†’8 sub-features)
9
  """
10
  import os, base64, io, json, re, numpy as np
11
  from PIL import Image
 
50
  if v=="AUTHENTIC": return -0.4
51
  return 0.0
52
 
53
+ # ── Shared calibration instruction appended to every prompt ──────────
54
+ CONFIDENCE_CALIBRATION = """
55
+
56
+ CONFIDENCE CALIBRATION β€” CRITICAL:
57
+ Your confidence score MUST follow these rules:
58
+ - Default to 0.5 if you are uncertain or the evidence is ambiguous.
59
+ - Only use 0.7+ if you observe an UNAMBIGUOUS, SPECIFIC violation (e.g., a hand with 6 clearly countable fingers, shadows pointing in opposite directions from same light source).
60
+ - Only use 0.3 or below if the image is clearly, unambiguously consistent with reality and you can articulate exactly why.
61
+ - Use 0.4-0.6 for most images. Most images are ambiguous. Do NOT inflate confidence.
62
+ - If a sub-analysis is not applicable (no people, no text, no transparent objects), set that field to null and do NOT let it affect your overall confidence.
63
+ VLMs systematically overstate confidence. Resist this bias. When in doubt, stay near 0.5."""
64
+
65
+
66
+ # ═══════════════════════════════════════════════════════════════════════
67
+ # PROMPT 1: LIGHTING (8 features)
68
+ # Owns: ALL illumination phenomena β€” shadows, highlights, light color,
69
+ # light transport (SSS, caustics, inter-reflections)
70
+ # Does NOT own: material reflectance (that's Physics), geometry (Physics)
71
+ # ═══════════════════════════════════════════════════════════════════════
72
+
73
+ SYS_LIGHTING = """You are a forensic lighting analyst. You detect QUALITATIVE inconsistencies in illumination that indicate AI generation or manipulation. You work from visual appearance, not metric measurement.
74
+
75
+ IMPORTANT: You are analyzing a 2D image. You CANNOT compute exact distances, angles, or irradiance values. Instead, you look for VISIBLE INCONSISTENCIES that would be obvious to a trained observer:
76
+
77
+ Your 8 analysis domains (you OWN these β€” no other agent covers them):
78
 
79
+ 1. SHADOW DIRECTION: Do shadows from different objects in the scene appear to point toward consistent light source position(s)? Look for shadows that diverge when they should converge, or shadows pointing in incompatible directions. You do NOT need to compute exact angles β€” just assess whether the overall shadow pattern is self-consistent.
80
 
81
+ 2. SHADOW QUALITY: Are shadow edges (penumbra) consistent with the apparent light source? A small bright light produces hard shadows; overcast sky produces soft shadows. Do ALL shadows in the scene share the same hardness/softness? Mixed hard and soft shadows without explanation (e.g., multiple lights) is suspicious.
 
 
 
 
 
 
 
 
82
 
83
+ 3. SPECULAR HIGHLIGHTS: Bright reflections on shiny surfaces encode the light direction. If multiple shiny objects are visible, do their highlights appear to come from the same direction? If a person has catchlights in their eyes, do both eyes show highlights in the same position?
84
 
85
+ 4. AMBIENT OCCLUSION: Where objects meet surfaces (feet on floor, cup on table, book on shelf), there should be subtle darkening at the contact line. AI images frequently omit contact shadows or place them incorrectly. Check: are contact shadows present where objects touch?
86
 
87
+ 5. COLOR TEMPERATURE: Light from a single source should tint all surfaces the same hue. Look for: one side of a face warm-toned while the other is cool-toned without a motivating second light source. Indoor scenes with mixed warm/cool illumination should have visible light sources to explain it.
88
+
89
+ 6. SUBSURFACE SCATTERING: If you can see thin body parts (ears, nostrils, fingers between a light) backlit by a strong source, they should glow warm/red from blood beneath the skin. If present, is it consistent with the light direction? If absent when expected, flag it.
90
+
91
+ 7. CAUSTICS: If glass, water, or transparent objects are present near a surface, look for projected light patterns. Their absence in a brightly lit scene with transparent objects is mildly suspicious. If caustics ARE visible, do they match the shape and position of the transparent object?
92
+
93
+ 8. INTER-REFLECTIONS: Strongly colored surfaces near neutral surfaces should tint them. A red blanket next to a white wall should cast a subtle red tint. Look for color bleeding that's present OR suspiciously absent.""" + CONFIDENCE_CALIBRATION
94
+
95
+ USR_LIGHTING = """Analyze this image for lighting inconsistencies across all 8 domains.
96
+
97
+ For each, give a QUALITATIVE assessment based on what you can visually observe β€” do NOT attempt to compute metric values like exact angles or irradiance.
98
 
99
  Respond in JSON:
100
  {
101
+ "shadow_direction_consistent": true/false,
102
+ "shadow_quality_consistent": true/false,
103
  "specular_consistent": true/false,
104
+ "ambient_occlusion_present": true/false,
105
  "color_temp_consistent": true/false,
106
  "sss_correct": true/false/null,
107
  "caustics_correct": true/false/null,
108
  "interreflections_ok": true/false/null,
109
+ "anomalies": ["specific anomaly descriptions with image region references"],
110
  "confidence": 0.0-1.0,
111
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
112
+ "explanation": "detailed reasoning citing what you observe, not what you compute"
113
  }"""
114
 
 
115
 
116
+ # ═══════════════════════════════════════════════════════════════════════
117
+ # PROMPT 2: ANATOMY (7 features)
118
+ # ═══════════════════════════════════════════════════════════════════════
119
+
120
+ SYS_ANATOMY = """You are a forensic anatomist. You detect anatomical errors in images that indicate AI generation.
 
 
 
121
 
122
+ DETECTION PROTOCOL:
123
 
124
+ 1. HANDS β€” This is your highest-priority check. Procedure:
125
+ a) Locate every visible hand in the image.
126
+ b) For each hand, COUNT fingers individually: thumb, index, middle, ring, pinky. State the count explicitly.
127
+ c) Verify each finger has correct joint count (thumb: 2 joints, others: 3 joints).
128
+ d) Check that joints bend only in anatomically possible directions.
129
+ e) Verify nails are on the correct (dorsal) side of each finger.
130
+ f) If hands are partially occluded, note what's visible vs. hidden.
131
 
132
+ 2. FACIAL SYMMETRY β€” Flag asymmetry ONLY if it would be noticeable to a casual observer at normal viewing distance. Natural faces have subtle asymmetry; AI faces often have GROSS asymmetry (one ear significantly higher/larger, one eye noticeably different shape, jawline shifted). Do NOT flag sub-pixel or barely perceptible differences.
 
 
 
 
 
 
 
133
 
134
+ 3. BODY PROPORTIONS β€” Check against standard human ratios: head β‰ˆ 1/7.5 of height, elbow at waist, fingertips at mid-thigh. Flag only OBVIOUS violations (forearm twice the length of upper arm, head clearly too large).
135
+
136
+ 4. SKIN TEXTURE β€” Look for abrupt texture changes: one patch of skin with visible pores adjacent to a smooth patch. Check for texture that transitions unnaturally between face regions.
137
+
138
+ 5. HAIR β€” Look for: strands that float disconnected from the scalp, hairline that dissolves into skin without natural transition, inconsistent hair direction (some strands defy gravity without explanation).
139
+
140
+ 6. EYE DETAILS β€” Catchlight reflections must appear in the same relative position in both eyes (same light source). Both irises should have the same color. Eyelashes should radiate outward from the lid margin.
141
+
142
+ 7. CLOTHING β€” Fabric must drape under gravity. Seams must be continuous (not disappearing/reappearing). Buttons must have buttonholes. Jewelry must connect to the body.""" + CONFIDENCE_CALIBRATION
143
+
144
+ USR_ANATOMY = """Perform anatomical forensic analysis.
145
+
146
+ MANDATORY: If hands are visible, explicitly count each finger on each hand. State your count clearly (e.g., "Left hand: thumb, index, middle, ring, pinky = 5 fingers").
147
+
148
+ If NO people are visible, set contains_people=false and skip all other fields.
149
 
150
  Respond in JSON:
151
  {
152
  "contains_people": true/false,
153
  "hands_correct": true/false/null,
154
+ "finger_count": "explicit count per hand, e.g. 'Left: 5 (thumb,index,middle,ring,pinky), Right: not visible'",
155
  "face_symmetric": true/false/null,
156
  "proportions_ok": true/false/null,
157
  "skin_natural": true/false/null,
158
  "hair_natural": true/false/null,
159
  "eyes_consistent": true/false/null,
160
  "clothing_ok": true/false/null,
161
+ "anomalies": ["specific anatomical errors with locations"],
162
  "confidence": 0.0-1.0,
163
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
164
+ "explanation": "reasoning with specific observations β€” for hands, cite your finger count"
165
  }"""
166
 
167
+
168
+ # ═══════════════════════════════════════════════════════════════════════
169
+ # PROMPT 3: PHYSICAL PLAUSIBILITY (8 features)
170
+ # Owns: geometry, material appearance, structural mechanics, object interaction
171
+ # Does NOT own: illumination/shadows (that's Lighting), anatomy (that's Anatomy)
172
+ # Explicit partition from Lighting: this agent checks materials, perspective, and
173
+ # structural physics. It does NOT re-analyze shadows, highlights, or light color.
174
+ # ═══════════════════════════════════════════════════════════════════════
175
+
176
+ SYS_PHYSICS = """You are a forensic physicist. You detect violations of geometry, material properties, and structural mechanics in images.
177
+
178
+ SCOPE β€” You analyze these 8 domains. You do NOT analyze lighting/shadows/specular highlights (a separate Lighting Agent handles those). Focus ONLY on:
179
+
180
+ 1. MATERIAL APPEARANCE: Does each material look like what it claims to be? Metals should show environment reflections. Wood should have grain. Fabric should have texture. The SAME material across an image should have consistent appearance. Look for: a "metal" railing that looks like plastic, or glass that doesn't distort the background.
181
+
182
+ 2. PERSPECTIVE GEOMETRY: Parallel lines in the real world (edges of buildings, railroad tracks, road markings) must converge to consistent vanishing points. Check for: lines that should be parallel but converge to different points, vertical lines that lean inconsistently.
183
+
184
+ 3. GRAVITY & STRUCTURE: Everything must obey gravity. Objects rest on surfaces, don't float. Liquids have flat surfaces. Cantilevered structures need support. Fabric hangs down. Hair falls down (unless wind/motion is depicted). Look for: floating objects, impossible structural loads, upward-flowing fabric.
185
+
186
+ 4. SCALE & PROPORTION: Objects with known real-world sizes (people ~1.7m, doors ~2m, cars ~4.5m, chairs ~0.45m seat height) should be proportional to each other. Check for: a person who would be 3m tall next to a door, or a cup the size of a head.
187
+
188
+ 5. TRANSPARENCY: Glass transmits and distorts. Water refracts. Transparent objects should show what's behind them, distorted appropriately. Frosted glass blurs. Thick glass distorts more. Check for: glass that's perfectly clear with no distortion, or opaque "glass."
189
+
190
+ 6. CONTACT PHYSICS: Where objects rest on soft surfaces, there should be deformation (cushion under person, mattress under object). Where heavy objects rest on surfaces, the surface should show appropriate response.
191
+
192
+ 7. MOTION COHERENCE: If motion blur is present, its direction and magnitude should be consistent with the depicted motion. A moving car should have horizontal blur. A falling object should have vertical blur. An image with one object blurred and everything else sharp needs a fast-moving object OR selective focus.
193
+
194
+ 8. DEPTH & OCCLUSION: Nearer objects must occlude farther ones consistently. No object should appear to be simultaneously in front of AND behind another object. Occlusion boundaries should be clean (no "melting" edges).""" + CONFIDENCE_CALIBRATION
195
+
196
+ USR_PHYSICS = """Analyze this image for physics violations.
197
+
198
+ SCOPE REMINDER: Do NOT analyze lighting, shadows, or specular highlights β€” that is handled by a separate agent. Focus on materials, geometry, gravity, scale, transparency, contact, motion, and depth.
199
 
200
  Respond in JSON:
201
  {
202
+ "material_consistent": true/false,
203
  "perspective_correct": true/false,
204
  "gravity_ok": true/false,
205
  "scale_consistent": true/false,
206
  "transparency_ok": true/false/null,
207
+ "contact_ok": true/false,
208
  "motion_ok": true/false/null,
209
  "depth_ordering_ok": true/false,
210
+ "anomalies": ["specific physics violations β€” not lighting"],
211
  "confidence": 0.0-1.0,
212
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
213
+ "explanation": "reasoning focused on geometry and material physics"
214
  }"""
215
 
 
216
 
217
+ # ═══════════════════════════════════════════════════════════════════════
218
+ # PROMPT 4: CONTEXT PLAUSIBILITY (8 features β€” expanded from 5)
219
+ # ═══════════════════════════════════════════════════════════════════════
220
+
221
+ SYS_CONTEXT = """You are a forensic scene analyst specializing in contextual coherence. AI-generated images often combine elements that could not physically coexist in the same real photograph.
222
+
223
+ Your 8 analysis domains:
224
+
225
+ 1. TEMPORAL SEASON: Vegetation, foliage color, and flower blooming must match. Snow on the ground requires bare or evergreen trees. Green deciduous leaves + snow is a contradiction. Clothing should match the apparent season.
226
+
227
+ 2. TIME OF DAY: Sky color/brightness must match shadow lengths and lighting direction. A bright blue sky requires short shadows (midday) or long shadows from a specific direction. Stars visible + brightly lit ground is contradictory.
228
+
229
+ 3. ERA / TECHNOLOGY ANACHRONISM: Visible technology (phones, cars, screens, signage style) should match the apparent era. A scene with 1950s architecture containing modern smartphones is suspicious. Fashion should match the apparent era of other objects.
230
+
231
+ 4. GEOGRAPHIC COHERENCE: Architecture style must match vegetation and climate. Tropical palm trees next to Northern European half-timbered houses is impossible. Road markings should match the apparent country (right-hand vs left-hand traffic, line styles). Visible text/signs should be in the expected language for the geography.
232
 
233
+ 5. WEATHER COHERENCE: Sky conditions must match ground conditions. Wet pavement requires recent rain or overcast sky. Dry dust in the air contradicts standing water. Snow requires freezing conditions (visible breath, winter clothing). Fog obscures distant objects.
234
+
235
+ 6. ATTIRE-SETTING MATCH: Beach clothing at a business meeting is impossible (unless clearly a party/casual scene). Winter coats in a tropical setting. Formal wear in a construction zone. Analyze whether clothing choices are plausible for the depicted location and activity.
236
+
237
+ 7. SIGN & LABEL COHERENCE: Visible signs, labels, and text should be appropriate for the scene type. A restaurant should show food-related signage. A hospital should show medical signage. Signs in a residential area should show house numbers, street names. Complete absence of expected signage in a commercial area is mildly suspicious.
238
+
239
+ 8. OBJECT FUNCTION & ARRANGEMENT: Furniture should be arranged for use (chairs face tables). Appliances should be connected (lamps plugged in, or at least near outlets). Tools should be held or stored correctly. Kitchen items should be in kitchens. Check for: objects that serve no function, impossible arrangements, or items placed where they'd be impractical.""" + CONFIDENCE_CALIBRATION
240
+
241
+ USR_CONTEXT = """Analyze contextual plausibility across all 8 domains:
242
+ 1. Temporal/Season β€” vegetation vs clothing vs weather
243
+ 2. Time of Day β€” sky vs shadows vs lighting
244
+ 3. Era/Technology β€” anachronistic objects
245
+ 4. Geographic β€” architecture vs vegetation vs signage language
246
+ 5. Weather β€” sky vs ground conditions vs attire
247
+ 6. Attire-Setting β€” clothing appropriate for location/activity
248
+ 7. Sign/Label Coherence β€” signage matches scene type
249
+ 8. Object Arrangement β€” functional, plausible placement
250
 
251
  Respond in JSON:
252
  {
253
+ "season_consistent": true/false,
254
+ "time_of_day_consistent": true/false,
255
+ "era_consistent": true/false,
256
  "geographic_consistent": true/false,
257
  "weather_consistent": true/false,
258
+ "attire_setting_match": true/false,
259
+ "signage_coherent": true/false,
260
  "objects_functional": true/false,
261
+ "anomalies": ["specific contextual violations with reasoning"],
262
  "confidence": 0.0-1.0,
263
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
264
+ "explanation": "detailed reasoning per domain"
265
  }"""
266
 
267
+
268
+ # ═══════════════════════════════════════════════════════════════════════
269
+ # AGENT RUNNER
270
+ # ═══════════════════════════════════════════════════════════════════════
271
+
272
+ # VLM confidence temperature β€” applied before feeding into Bayesian Eq.1
273
+ # VLMs systematically overstate confidence; this compresses toward 0.5
274
+ VLM_CONFIDENCE_TEMPERATURE = 2.0
275
+
276
+ def _calibrate_vlm_confidence(raw_conf: float) -> float:
277
+ """Post-process VLM confidence with temperature scaling.
278
+ Compresses extreme values toward 0.5 to counter VLM overconfidence."""
279
+ if raw_conf <= 0 or raw_conf >= 1:
280
+ return 0.5
281
+ logit = np.log(raw_conf / (1 - raw_conf))
282
+ scaled = logit / VLM_CONFIDENCE_TEMPERATURE
283
+ return float(1.0 / (1.0 + np.exp(-scaled)))
284
+
285
+
286
  def run_semantic_agent(img):
287
+ findings, scores = [], []
288
+ vlm_ok = True
289
 
290
+ for sys_p, usr_p, name, features in [
291
+ (SYS_LIGHTING, USR_LIGHTING, "Lighting Physics",
292
+ ["Shadow Direction","Shadow Quality","Specular Consistency","Ambient Occlusion",
293
+ "Color Temperature","Subsurface Scattering","Caustics","Inter-reflections"]),
294
+ (SYS_ANATOMY, USR_ANATOMY, "Anatomical Analysis",
295
+ ["Hand Anatomy","Facial Symmetry","Body Proportions","Skin Texture",
296
+ "Hair Consistency","Eye Details","Clothing Physics"]),
297
+ (SYS_PHYSICS, USR_PHYSICS, "Physical Plausibility",
298
+ ["Material Appearance","Perspective Geometry","Gravity & Structure",
299
+ "Scale & Proportion","Transparency","Contact Physics","Motion Coherence","Depth & Occlusion"]),
300
  ]:
301
  try:
302
+ resp = _vlm(img, sys_p, usr_p)
303
  if resp and not resp.startswith("VLM_ERROR"):
304
+ parsed = _parse(resp)
305
+ sc = _score(parsed)
306
+
307
+ # Calibrate VLM confidence before storing
308
+ raw_conf = parsed.get("confidence", 0.5)
309
+ cal_conf = _calibrate_vlm_confidence(raw_conf)
310
+
311
+ if name == "Anatomical Analysis" and not parsed.get("contains_people", True):
312
+ sc = 0.0
313
+
314
+ anomalies = parsed.get("anomalies", [])
315
  for feat in features:
316
+ findings.append({"test": feat, "score": sc / len(features),
317
+ "note": parsed.get("explanation", "")[:100], "parent": name})
318
+ scores.append(sc / len(features))
319
+
320
+ findings.append({"test": name, "vlm_analysis": parsed, "anomalies": anomalies,
321
+ "score": sc, "confidence": cal_conf,
322
+ "raw_vlm_confidence": raw_conf,
323
+ "calibrated_confidence": cal_conf,
324
+ "note": parsed.get("explanation", "")[:200]})
325
  scores.append(sc)
326
  else:
327
+ vlm_ok = False
328
  for feat in features:
329
+ findings.append({"test": feat, "score": 0.0, "note": "VLM unavailable", "vlm_error": True})
330
  scores.append(0.0)
331
  except Exception as e:
332
+ findings.append({"test": name, "error": str(e), "score": 0})
333
 
334
+ # Context plausibility (expanded to 8 sub-features)
335
  try:
336
+ resp = _vlm(img, SYS_CONTEXT, USR_CONTEXT)
337
  if resp and not resp.startswith("VLM_ERROR"):
338
+ parsed = _parse(resp)
339
+ sc = _score(parsed)
340
+ raw_conf = parsed.get("confidence", 0.5)
341
+ cal_conf = _calibrate_vlm_confidence(raw_conf)
342
+
343
+ context_features = ["Season Consistency","Time-of-Day","Era/Technology",
344
+ "Geographic Coherence","Weather Coherence",
345
+ "Attire-Setting Match","Sign/Label Coherence","Object Arrangement"]
346
+ for feat in context_features:
347
+ findings.append({"test": feat, "score": sc / len(context_features),
348
+ "note": parsed.get("explanation", "")[:100], "parent": "Context"})
349
+ scores.append(sc / len(context_features))
350
+
351
+ findings.append({"test": "Context Plausibility", "vlm_analysis": parsed,
352
+ "score": sc, "confidence": cal_conf,
353
+ "note": parsed.get("explanation", "")[:200]})
354
+ scores.append(sc)
355
+ else:
356
+ vlm_ok = False
357
+ except:
358
+ pass
359
+
360
+ avg = float(np.mean(scores)) if scores else 0.0
361
+ conf = min(1.0, 0.4 + 0.5 * abs(avg))
362
+ if not vlm_ok:
363
+ conf *= 0.3
364
+
365
+ viol = [f["test"] for f in findings if f.get("score", 0) > 0.15 and "parent" not in f]
366
+ comp = [f["test"] for f in findings if f.get("score", 0) < -0.1 and "parent" not in f]
367
+ rat = f"Semantic violations: {', '.join(viol[:5])}." if viol else \
368
+ f"Semantically consistent: {', '.join(comp[:5])}." if comp else "Semantic inconclusive."
369
  for f in findings:
370
+ if f.get("note") and "parent" not in f:
371
+ rat += f" [{f['test']}]: {f['note'][:100]}."
372
+
373
+ return AgentEvidence("Semantic Consistency Agent", np.clip(avg, -1, 1), conf,
374
+ 0.0 if vlm_ok else 0.8, rat,
375
+ [f for f in findings if "parent" not in f])