anky2002 commited on
Commit
8b4b288
Β·
verified Β·
1 Parent(s): d5f3cd5

Upload agents/semantic_agent.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. agents/semantic_agent.py +212 -316
agents/semantic_agent.py CHANGED
@@ -1,358 +1,254 @@
 
 
1
  """
2
- FORENSIQ β€” Semantic Consistency Agent (VLM-powered)
3
- Uses Qwen2.5-VL via HF Inference to evaluate:
4
- - Lighting consistency (shadow convergence, inverse square law)
5
- - Material properties (BRDF anomalies, reflectance)
6
- - Anatomical errors (finger count, joint angles, facial symmetry)
7
- - Physical plausibility (gravity, perspective, scale)
8
- """
9
-
10
- import os
11
- import base64
12
- import io
13
- import json
14
- import re
15
- import numpy as np
16
  from PIL import Image
17
  from typing import Dict, Any, Optional
18
- from dataclasses import dataclass
19
-
20
  from agents.optical_agent import AgentEvidence
21
 
22
- # ─── VLM Interface ───────────────────────────────────────────────────
23
-
24
- def _encode_image_b64(img: Image.Image, max_size: int = 1024) -> str:
25
- """Encode PIL image as base64 JPEG for API submission."""
26
- # Resize if too large
27
- w, h = img.size
28
- if max(w, h) > max_size:
29
- ratio = max_size / max(w, h)
30
- img = img.resize((int(w * ratio), int(h * ratio)), Image.LANCZOS)
31
- buf = io.BytesIO()
32
- img.convert("RGB").save(buf, format="JPEG", quality=90)
33
- return base64.b64encode(buf.getvalue()).decode("utf-8")
34
 
35
-
36
- def _call_vlm(img: Image.Image, system_prompt: str, user_prompt: str) -> Optional[str]:
37
- """Call Qwen2.5-VL-7B via HF router (OpenAI-compatible endpoint)."""
38
  try:
39
  from openai import OpenAI
40
- except ImportError:
41
- return None
42
-
43
- token = os.environ.get("HF_TOKEN", "")
44
- if not token:
45
- return None
46
-
47
- try:
48
- client = OpenAI(
49
- base_url="https://router.huggingface.co/v1",
50
- api_key=token,
51
- )
52
-
53
- b64 = _encode_image_b64(img)
54
-
55
- response = client.chat.completions.create(
56
- model="Qwen/Qwen2.5-VL-72B-Instruct",
57
- messages=[
58
- {"role": "system", "content": system_prompt},
59
- {
60
- "role": "user",
61
- "content": [
62
- {
63
- "type": "image_url",
64
- "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
65
- },
66
- {"type": "text", "text": user_prompt},
67
- ],
68
- },
69
- ],
70
- max_tokens=1500,
71
- temperature=0.1,
72
- )
73
- return response.choices[0].message.content
74
- except Exception as e:
75
- return f"VLM_ERROR: {str(e)}"
76
-
77
-
78
- def _parse_vlm_json(text: str) -> Dict[str, Any]:
79
- """Extract JSON from VLM response (handles markdown code blocks)."""
80
- if text is None:
81
- return {}
82
- # Try to find JSON block
83
- json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
84
- if json_match:
85
- try:
86
- return json.loads(json_match.group(1))
87
- except json.JSONDecodeError:
88
- pass
89
- # Try direct parse
90
  try:
91
- return json.loads(text)
92
- except json.JSONDecodeError:
93
- pass
94
- # Try to find any {...} block
95
- brace_match = re.search(r'\{[^{}]*\}', text, re.DOTALL)
96
- if brace_match:
97
- try:
98
- return json.loads(brace_match.group(0))
99
- except json.JSONDecodeError:
100
- pass
101
- return {"raw_response": text}
102
-
103
-
104
- # ─── Lighting Consistency ────────────────────────────────────────────
105
-
106
- LIGHTING_SYSTEM_PROMPT = """You are an expert forensic image analyst specializing in lighting physics and photogrammetry. Your task is to analyze images for lighting consistency violations that indicate AI generation or manipulation.
107
-
108
- You understand:
109
- - Shadow direction convergence (all shadows must trace back to consistent light source positions)
110
- - Inverse square law (light intensity falls off as 1/rΒ²)
111
- - Specular highlight placement (must be consistent with light source direction)
112
- - Ambient vs direct lighting ratios
113
- - Multiple light source scenarios
114
- - Reflection consistency in eyes, glasses, and shiny surfaces
115
-
116
- Be precise, clinical, and evidence-based. Cite specific image regions when noting anomalies."""
117
-
118
- LIGHTING_USER_PROMPT = """Analyze this image for lighting consistency. Examine:
119
- 1. Shadow directions β€” do all shadows point to consistent light source(s)?
120
- 2. Shadow softness β€” is it consistent with the apparent light source distance?
121
- 3. Specular highlights β€” are reflections in eyes, skin, and objects consistent?
122
- 4. Light falloff β€” does brightness decrease naturally with distance from light?
123
- 5. Ambient lighting β€” is the ambient-to-direct ratio physically plausible?
124
-
125
- Respond in JSON format:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  {
127
- "lighting_consistent": true/false,
128
- "shadow_direction_consistent": true/false,
129
- "specular_highlights_consistent": true/false,
130
- "light_falloff_natural": true/false,
131
- "anomalies": ["list of specific anomalies found, empty if none"],
 
 
 
 
132
  "confidence": 0.0-1.0,
133
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
134
- "explanation": "detailed reasoning"
135
  }"""
136
 
 
137
 
138
- def analyze_lighting(img: Image.Image) -> Dict[str, Any]:
139
- response = _call_vlm(img, LIGHTING_SYSTEM_PROMPT, LIGHTING_USER_PROMPT)
140
- if response and not response.startswith("VLM_ERROR"):
141
- parsed = _parse_vlm_json(response)
142
- verdict = parsed.get("verdict", "UNKNOWN")
143
- anomalies = parsed.get("anomalies", [])
144
- confidence = parsed.get("confidence", 0.5)
145
-
146
- if verdict == "MANIPULATED":
147
- score = 0.7
148
- elif verdict == "SUSPICIOUS":
149
- score = 0.4
150
- elif verdict == "AUTHENTIC":
151
- score = -0.4
152
- else:
153
- score = 0.0
154
-
155
- return {
156
- "test": "Lighting Consistency",
157
- "vlm_analysis": parsed,
158
- "anomalies": anomalies,
159
- "score": score,
160
- "confidence": confidence,
161
- "note": parsed.get("explanation", response[:200]),
162
- }
163
- else:
164
- return {
165
- "test": "Lighting Consistency",
166
- "score": 0.0,
167
- "note": f"VLM unavailable: {response or 'no HF_TOKEN'}",
168
- "vlm_error": True,
169
- }
170
-
171
 
172
- # ─── Anatomical Analysis ────────────────────────────────────────────
173
 
174
- ANATOMY_SYSTEM_PROMPT = """You are an expert forensic analyst specializing in human anatomy verification in images. AI-generated images frequently contain anatomical errors that are physically impossible.
175
 
176
- You have encyclopedic knowledge of:
177
- - Hand anatomy: finger count (exactly 5 per hand), joint bending directions, nail placement, proportions
178
- - Facial anatomy: bilateral symmetry, ear alignment, eye spacing, teeth regularity
179
- - Body proportions: limb ratios, joint angles, skeletal plausibility
180
- - Skin texture: pore consistency, wrinkle patterns, hair follicle distribution
181
- - Clothing physics: fabric draping, seam continuity, button alignment
 
 
182
 
183
- AI-generated images commonly fail on: extra/missing fingers, impossible joint angles, asymmetric ears, teeth anomalies, melted/merged body parts, clothing that defies physics."""
184
 
185
- ANATOMY_USER_PROMPT = """Carefully examine this image for anatomical correctness. Check:
186
- 1. Hands: Count fingers on each visible hand. Check joint angles and proportions.
187
- 2. Face: Check bilateral symmetry, ear alignment, eye consistency, teeth.
188
- 3. Body: Check limb proportions, joint angles, body part connections.
189
- 4. Skin/Hair: Check texture consistency, pore patterns, hairline.
190
- 5. Clothing: Check seam continuity, fabric physics, accessory consistency.
191
-
192
- Respond in JSON format:
193
  {
194
  "contains_people": true/false,
195
- "finger_count_correct": true/false/null,
196
- "facial_symmetry_ok": true/false/null,
197
- "body_proportions_ok": true/false/null,
198
- "skin_texture_natural": true/false/null,
199
- "clothing_physics_ok": true/false/null,
200
- "anomalies": ["list of specific anatomical errors found"],
 
 
 
201
  "confidence": 0.0-1.0,
202
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
203
  "explanation": "detailed reasoning with specific observations"
204
  }"""
205
 
206
-
207
- def analyze_anatomy(img: Image.Image) -> Dict[str, Any]:
208
- response = _call_vlm(img, ANATOMY_SYSTEM_PROMPT, ANATOMY_USER_PROMPT)
209
- if response and not response.startswith("VLM_ERROR"):
210
- parsed = _parse_vlm_json(response)
211
-
212
- if not parsed.get("contains_people", True):
213
- return {
214
- "test": "Anatomical Analysis",
215
- "score": 0.0,
216
- "note": "No people detected in image β€” anatomical analysis not applicable",
217
- "vlm_analysis": parsed,
218
- }
219
-
220
- verdict = parsed.get("verdict", "UNKNOWN")
221
- anomalies = parsed.get("anomalies", [])
222
-
223
- if verdict == "MANIPULATED":
224
- score = 0.8
225
- elif verdict == "SUSPICIOUS":
226
- score = 0.4
227
- elif verdict == "AUTHENTIC":
228
- score = -0.4
229
- else:
230
- score = 0.0
231
-
232
- return {
233
- "test": "Anatomical Analysis",
234
- "vlm_analysis": parsed,
235
- "anomalies": anomalies,
236
- "score": score,
237
- "confidence": parsed.get("confidence", 0.5),
238
- "note": parsed.get("explanation", response[:200]),
239
- }
240
- else:
241
- return {
242
- "test": "Anatomical Analysis",
243
- "score": 0.0,
244
- "note": f"VLM unavailable: {response or 'no HF_TOKEN'}",
245
- "vlm_error": True,
246
- }
247
-
248
-
249
- # ─── Material / Physics Plausibility ────────────────────────────────
250
-
251
- PHYSICS_SYSTEM_PROMPT = """You are an expert forensic physicist who analyzes images for violations of physical laws. AI-generated images often violate basic physics because generative models learn visual patterns without understanding underlying physics.
252
-
253
- Your expertise covers:
254
- - Material reflectance: metals should reflect surroundings, glass should refract, matte surfaces shouldn't have specular highlights
255
- - BRDF consistency: bidirectional reflectance should be consistent across the same material
256
- - Gravity and structural physics: objects should rest on surfaces, liquids should be level, structures should be load-bearing
257
- - Perspective geometry: parallel lines should converge to consistent vanishing points
258
- - Scale consistency: known objects should be proportional to each other
259
- - Transparency/refraction: glass, water, and transparent objects should distort backgrounds correctly"""
260
-
261
- PHYSICS_USER_PROMPT = """Analyze this image for physical plausibility violations:
262
- 1. Material properties: Are reflections, textures, and surface properties physically correct?
263
- 2. Perspective: Do parallel lines converge to consistent vanishing points?
264
- 3. Scale: Are objects proportional to each other and known references?
265
- 4. Gravity: Do objects rest naturally? Are liquids level? Do fabrics drape correctly?
266
- 5. Transparency: Do glass, water, or transparent objects refract/distort correctly?
267
-
268
- Respond in JSON format:
269
  {
270
- "materials_consistent": true/false,
271
  "perspective_correct": true/false,
 
272
  "scale_consistent": true/false,
273
- "gravity_plausible": true/false,
274
- "anomalies": ["list of specific physics violations"],
 
 
 
275
  "confidence": 0.0-1.0,
276
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
277
  "explanation": "detailed reasoning"
278
  }"""
279
 
 
280
 
281
- def analyze_physics(img: Image.Image) -> Dict[str, Any]:
282
- response = _call_vlm(img, PHYSICS_SYSTEM_PROMPT, PHYSICS_USER_PROMPT)
283
- if response and not response.startswith("VLM_ERROR"):
284
- parsed = _parse_vlm_json(response)
285
- verdict = parsed.get("verdict", "UNKNOWN")
286
- anomalies = parsed.get("anomalies", [])
287
-
288
- if verdict == "MANIPULATED":
289
- score = 0.6
290
- elif verdict == "SUSPICIOUS":
291
- score = 0.3
292
- elif verdict == "AUTHENTIC":
293
- score = -0.4
294
- else:
295
- score = 0.0
296
 
297
- return {
298
- "test": "Physical Plausibility",
299
- "vlm_analysis": parsed,
300
- "anomalies": anomalies,
301
- "score": score,
302
- "confidence": parsed.get("confidence", 0.5),
303
- "note": parsed.get("explanation", response[:200]),
304
- }
305
- else:
306
- return {
307
- "test": "Physical Plausibility",
308
- "score": 0.0,
309
- "note": f"VLM unavailable: {response or 'no HF_TOKEN'}",
310
- "vlm_error": True,
311
- }
312
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
- # ─── Main Agent Entry Point ─────────────────────────────────────────
315
- def run_semantic_agent(img: Image.Image) -> AgentEvidence:
316
- """Run all semantic consistency tests via VLM."""
317
- findings = []
318
- scores = []
319
- vlm_available = True
320
-
321
- for fn in [analyze_lighting, analyze_anatomy, analyze_physics]:
 
322
  try:
323
- result = fn(img)
324
- findings.append(result)
325
- scores.append(result["score"])
326
- if result.get("vlm_error"):
327
- vlm_available = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  except Exception as e:
329
- findings.append({"test": fn.__name__, "error": str(e), "score": 0})
330
-
331
- avg_score = float(np.mean(scores)) if scores else 0.0
332
- confidence = min(1.0, 0.4 + 0.5 * abs(avg_score))
333
-
334
- if not vlm_available:
335
- confidence *= 0.3 # Low confidence without VLM
336
-
337
- violations = [f["test"] for f in findings if f.get("score", 0) > 0.2]
338
- compliant = [f["test"] for f in findings if f.get("score", 0) < -0.1]
339
-
340
- if violations:
341
- rationale = f"Semantic violations detected: {', '.join(violations)}."
342
- elif compliant:
343
- rationale = f"Semantic consistency confirmed: {', '.join(compliant)}."
344
- else:
345
- rationale = "Semantic analysis inconclusive."
346
-
 
347
  for f in findings:
348
- if f.get("note"):
349
- rationale += f" [{f['test']}]: {f['note'][:150]}."
350
-
351
- return AgentEvidence(
352
- agent_name="Semantic Consistency Agent",
353
- violation_score=np.clip(avg_score, -1, 1),
354
- confidence=confidence,
355
- failure_prob=0.0 if vlm_available else 0.8,
356
- rationale=rationale,
357
- sub_findings=findings,
358
- )
 
1
+ """FORENSIQ β€” Semantic Consistency Agent (23 features via VLM)
2
+ Uses Qwen2.5-VL-72B with expert forensic prompts for deep visual reasoning.
3
  """
4
+ import os, base64, io, json, re, numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from PIL import Image
6
  from typing import Dict, Any, Optional
 
 
7
  from agents.optical_agent import AgentEvidence
8
 
9
+ def _b64(img, mx=1024):
10
+ w,h=img.size
11
+ if max(w,h)>mx: r=mx/max(w,h); img=img.resize((int(w*r),int(h*r)),Image.LANCZOS)
12
+ buf=io.BytesIO(); img.convert("RGB").save(buf,"JPEG",quality=90); return base64.b64encode(buf.getvalue()).decode()
 
 
 
 
 
 
 
 
13
 
14
+ def _vlm(img, sys_prompt, user_prompt):
 
 
15
  try:
16
  from openai import OpenAI
17
+ except ImportError: return None
18
+ token=os.environ.get("HF_TOKEN","")
19
+ if not token: return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  try:
21
+ client=OpenAI(base_url="https://router.huggingface.co/v1",api_key=token)
22
+ b64=_b64(img)
23
+ resp=client.chat.completions.create(model="Qwen/Qwen2.5-VL-72B-Instruct",messages=[
24
+ {"role":"system","content":sys_prompt},
25
+ {"role":"user","content":[{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}},{"type":"text","text":user_prompt}]}
26
+ ],max_tokens=2000,temperature=0.1)
27
+ return resp.choices[0].message.content
28
+ except Exception as e: return f"VLM_ERROR: {e}"
29
+
30
+ def _parse(text):
31
+ if not text: return {}
32
+ for pattern in [r'```(?:json)?\s*(\{.*?\})\s*```', r'(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})']:
33
+ m=re.search(pattern,text,re.DOTALL)
34
+ if m:
35
+ try: return json.loads(m.group(1))
36
+ except: pass
37
+ try: return json.loads(text)
38
+ except: return {"raw":text}
39
+
40
+ def _score(parsed):
41
+ v=parsed.get("verdict","UNKNOWN")
42
+ if v=="MANIPULATED": return 0.7
43
+ if v=="SUSPICIOUS": return 0.4
44
+ if v=="AUTHENTIC": return -0.4
45
+ return 0.0
46
+
47
+ # ═══ SYSTEM PROMPTS (23 features grouped into 5 VLM calls) ═══════════
48
+
49
+ SYS_LIGHTING = """You are a world-class forensic photogrammetrist with 20+ years analyzing lighting in images for legal proceedings. You understand radiometry, photometry, and the physics of light transport at an expert level.
50
+
51
+ Your analysis capabilities:
52
+ 1. SHADOW GEOMETRY: Trace every shadow to its casting object. All shadow vectors must converge to consistent light source position(s). Shadow length encodes sun elevation via tan(ΞΈ) = object_height/shadow_length. Penumbra width encodes light source angular size.
53
+ 2. INVERSE SQUARE LAW: Light intensity I = P/(4Ο€rΒ²). Surfaces equidistant from a point light must have equal irradiance. Check illumination falloff on flat surfaces (walls, floors, tables).
54
+ 3. SPECULAR HIGHLIGHTS: Each specular reflection encodes light source direction via the reflection law (angle of incidence = angle of reflection). Check that specular highlights across different objects in the scene are consistent with the same light source(s).
55
+ 4. AMBIENT OCCLUSION: Contact shadows and ambient occlusion should be darkest in concavities and where objects touch surfaces. AI often forgets these subtle cues.
56
+ 5. COLOR TEMPERATURE: All illuminated surfaces under the same light should share its color temperature. Mixed lighting (warm/cool) must be physically motivated (window + lamp).
57
+ 6. SUBSURFACE SCATTERING: Thin objects (ears, fingers, leaves) backlit by strong light should show red/warm translucency. AI rarely gets this right.
58
+ 7. CAUSTICS: Light through transparent objects (glass, water) creates caustic patterns. If present, they must match the refracting geometry.
59
+ 8. INTER-REFLECTIONS: Colored surfaces bounce colored light onto nearby surfaces. A red wall should tint nearby white objects slightly red.
60
+
61
+ Report ALL violations with specific image region references. Be precise and clinical."""
62
+
63
+ USR_LIGHTING = """Perform a complete lighting forensic analysis of this image.
64
+
65
+ For each of these 8 sub-analyses, provide a separate assessment:
66
+ 1. Shadow Direction Convergence β€” trace visible shadows, do they converge?
67
+ 2. Inverse Square Law β€” does light intensity fall off naturally?
68
+ 3. Specular Highlight Consistency β€” are reflections physically consistent?
69
+ 4. Ambient Occlusion β€” are contact shadows present and correct?
70
+ 5. Color Temperature Consistency β€” does illumination color match across the scene?
71
+ 6. Subsurface Scattering β€” if thin translucent objects are visible, is SSS correct?
72
+ 7. Caustics β€” if transparent objects are present, are caustics correct?
73
+ 8. Inter-reflections β€” do colored surfaces bounce light correctly?
74
+
75
+ Respond in JSON:
76
  {
77
+ "shadow_convergent": true/false,
78
+ "inverse_square_ok": true/false,
79
+ "specular_consistent": true/false,
80
+ "ambient_occlusion_ok": true/false,
81
+ "color_temp_consistent": true/false,
82
+ "sss_correct": true/false/null,
83
+ "caustics_correct": true/false/null,
84
+ "interreflections_ok": true/false/null,
85
+ "anomalies": ["specific anomaly descriptions"],
86
  "confidence": 0.0-1.0,
87
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
88
+ "explanation": "detailed reasoning citing specific image regions"
89
  }"""
90
 
91
+ SYS_ANATOMY = """You are a forensic anatomist and medical illustrator with encyclopedic knowledge of human body structure. AI-generated images violate anatomy in specific, detectable ways.
92
 
93
+ Your detection capabilities:
94
+ 1. HANDS: Exactly 5 fingers per hand. Each finger has 3 phalanges (thumb: 2). Joints bend in ONE direction only. Nails are on the dorsal side. Thumb opposes other fingers. Palm lines, knuckle creases, and tendons must be consistent.
95
+ 2. FACIAL STRUCTURE: Bilateral symmetry (not perfect, but close). Eyes at same height, same size, same iris color. Ears at eye level, same size and shape. Teeth follow dental arch. Nostrils are symmetric.
96
+ 3. BODY PROPORTIONS: Head β‰ˆ 1/7.5 of body height. Arm span β‰ˆ height. Legs β‰ˆ 50% of height. Elbow at waist level. Knee at mid-leg.
97
+ 4. SKIN TEXTURE: Consistent pore density. Wrinkles follow muscle fiber directions. No texture discontinuities.
98
+ 5. HAIR: Consistent direction of growth. No floating strands disconnected from scalp. Hairline follows natural patterns.
99
+ 6. EYES: Catchlight reflections should match between eyes and match the lighting direction. Iris has consistent color and pattern. Sclera is white with subtle veins.
100
+ 7. CLOTHING/ACCESSORIES: Fabric drapes under gravity. Seams are continuous. Buttons/zippers are physically connected. Jewelry doesn't float.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ Count fingers explicitly. Note any impossible joint angles. Check ear symmetry precisely."""
103
 
104
+ USR_ANATOMY = """Perform a thorough anatomical forensic analysis of this image.
105
 
106
+ Analyze each of these 7 categories:
107
+ 1. HAND ANATOMY β€” Count fingers on each visible hand. Check joint angles, nail placement, proportions.
108
+ 2. FACIAL SYMMETRY β€” Check eye alignment, ear symmetry, nose/mouth centering, teeth.
109
+ 3. BODY PROPORTIONS β€” Check limb ratios, joint positions, head-to-body ratio.
110
+ 4. SKIN & TEXTURE β€” Check pore consistency, wrinkle patterns, texture continuity.
111
+ 5. HAIR β€” Check growth direction, hairline, strand connectivity.
112
+ 6. EYE DETAILS β€” Check catchlights, iris consistency, sclera, eyelash direction.
113
+ 7. CLOTHING PHYSICS β€” Check fabric draping, seam continuity, accessory placement.
114
 
115
+ If NO people are visible, set contains_people=false.
116
 
117
+ Respond in JSON:
 
 
 
 
 
 
 
118
  {
119
  "contains_people": true/false,
120
+ "hands_correct": true/false/null,
121
+ "finger_count": "e.g. 'Left: 5, Right: 5' or 'Left: 6 (extra pinky)'",
122
+ "face_symmetric": true/false/null,
123
+ "proportions_ok": true/false/null,
124
+ "skin_natural": true/false/null,
125
+ "hair_natural": true/false/null,
126
+ "eyes_consistent": true/false/null,
127
+ "clothing_ok": true/false/null,
128
+ "anomalies": ["specific anatomical errors"],
129
  "confidence": 0.0-1.0,
130
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
131
  "explanation": "detailed reasoning with specific observations"
132
  }"""
133
 
134
+ SYS_PHYSICS = """You are a forensic physicist specializing in physical plausibility analysis. Generative AI learns visual patterns but does NOT understand physics. Your job is to find violations.
135
+
136
+ Your analysis domains:
137
+ 1. MATERIAL BRDF: Metals are specular and reflect environment. Glass refracts and distorts background. Matte surfaces have diffuse reflection only. Wet surfaces have higher specularity. The same material must have consistent reflectance properties.
138
+ 2. PERSPECTIVE GEOMETRY: All parallel lines in 3D converge to the same vanishing point. Vertical lines should remain vertical (unless tilted camera). Objects at the same distance should have the same scale.
139
+ 3. GRAVITY & MECHANICS: Objects rest on surfaces, not float. Liquids are level. Fabric drapes downward. Hair falls with gravity (unless in motion). Structures must be load-bearing.
140
+ 4. SCALE CONSISTENCY: Known objects (people, cars, doors, furniture) have known sizes. Check relative proportions.
141
+ 5. TRANSPARENCY & REFRACTION: Glass distorts what's behind it. Water refracts objects below the surface. Transparency should be consistent with material thickness.
142
+ 6. CONTACT & INTERACTION: Objects touching surfaces have contact shadows. Weight deforms soft surfaces. Reflections on surfaces show correct geometry.
143
+ 7. MOTION CONSISTENCY: If motion blur is present, it should be consistent with object velocity and direction. Frozen motion should show physically plausible pose.
144
+ 8. DEPTH ORDERING: Objects closer should occlude objects farther. No impossible overlaps."""
145
+
146
+ USR_PHYSICS = """Analyze this image for violations of physical laws across 8 domains:
147
+ 1. Material BRDF consistency β€” are surface reflections physically correct?
148
+ 2. Perspective geometry β€” do parallel lines converge correctly?
149
+ 3. Gravity and mechanics β€” do objects obey gravity?
150
+ 4. Scale consistency β€” are objects proportional?
151
+ 5. Transparency/refraction β€” do transparent objects distort correctly?
152
+ 6. Contact and interaction β€” correct shadows and deformation?
153
+ 7. Motion consistency β€” is blur/motion physically plausible?
154
+ 8. Depth ordering β€” correct occlusion?
155
+
156
+ Respond in JSON:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  {
158
+ "brdf_consistent": true/false,
159
  "perspective_correct": true/false,
160
+ "gravity_ok": true/false,
161
  "scale_consistent": true/false,
162
+ "transparency_ok": true/false/null,
163
+ "contact_correct": true/false,
164
+ "motion_ok": true/false/null,
165
+ "depth_ordering_ok": true/false,
166
+ "anomalies": ["specific physics violations"],
167
  "confidence": 0.0-1.0,
168
  "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
169
  "explanation": "detailed reasoning"
170
  }"""
171
 
172
+ SYS_CONTEXT = """You are a forensic scene analyst who evaluates whether an image's content is contextually plausible. AI-generated images often combine elements that shouldn't coexist.
173
 
174
+ Your analysis:
175
+ 1. TEMPORAL CONSISTENCY: Season (foliage, clothing), time of day (sky, shadows, lighting), era (technology, fashion).
176
+ 2. GEOGRAPHIC CONSISTENCY: Architecture style matches vegetation. Road markings match country. Signs are in expected language.
177
+ 3. WEATHER CONSISTENCY: Sky matches ground conditions. Wet ground β†’ overcast or recent rain. Snow β†’ cold-weather attire.
178
+ 4. SOCIAL PLAUSIBILITY: People's attire matches setting. Group interactions are natural. No impossible crowd configurations.
179
+ 5. OBJECT RELATIONSHIPS: Furniture is functional. Appliances are connected. Tools are held correctly."""
 
 
 
 
 
 
 
 
 
180
 
181
+ USR_CONTEXT = """Analyze contextual plausibility across 5 domains:
182
+ 1. Temporal β€” season, time of day, era consistency
183
+ 2. Geographic β€” architecture, vegetation, signage consistency
184
+ 3. Weather β€” sky vs ground conditions
185
+ 4. Social β€” attire, interactions, crowd plausibility
186
+ 5. Object relationships β€” functional arrangement
 
 
 
 
 
 
 
 
 
187
 
188
+ Respond in JSON:
189
+ {
190
+ "temporal_consistent": true/false,
191
+ "geographic_consistent": true/false,
192
+ "weather_consistent": true/false,
193
+ "social_plausible": true/false,
194
+ "objects_functional": true/false,
195
+ "anomalies": ["specific contextual violations"],
196
+ "confidence": 0.0-1.0,
197
+ "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
198
+ "explanation": "reasoning"
199
+ }"""
200
 
201
+ def run_semantic_agent(img):
202
+ findings,scores=[],[]
203
+ vlm_ok=True
204
+
205
+ for sys_p,usr_p,name,features in [
206
+ (SYS_LIGHTING, USR_LIGHTING, "Lighting Physics", ["Shadow Convergence","Inverse Square Law","Specular Consistency","Ambient Occlusion","Color Temperature","Subsurface Scattering","Caustics","Inter-reflections"]),
207
+ (SYS_ANATOMY, USR_ANATOMY, "Anatomical Analysis", ["Hand Anatomy","Facial Symmetry","Body Proportions","Skin Texture","Hair","Eye Details","Clothing Physics"]),
208
+ (SYS_PHYSICS, USR_PHYSICS, "Physical Plausibility", ["Material BRDF","Perspective Geometry","Gravity","Scale","Transparency","Contact","Motion","Depth Ordering"]),
209
+ ]:
210
  try:
211
+ resp=_vlm(img,sys_p,usr_p)
212
+ if resp and not resp.startswith("VLM_ERROR"):
213
+ parsed=_parse(resp)
214
+ sc=_score(parsed)
215
+ if name=="Anatomical Analysis" and not parsed.get("contains_people",True):
216
+ sc=0.0
217
+ # Create sub-findings for each feature
218
+ anomalies=parsed.get("anomalies",[])
219
+ for feat in features:
220
+ findings.append({"test":feat,"score":sc/len(features),"note":parsed.get("explanation","")[:100],"parent":name})
221
+ scores.append(sc/len(features))
222
+ findings.append({"test":name,"vlm_analysis":parsed,"anomalies":anomalies,
223
+ "score":sc,"confidence":parsed.get("confidence",0.5),
224
+ "note":parsed.get("explanation","")[:200]})
225
+ scores.append(sc)
226
+ else:
227
+ vlm_ok=False
228
+ for feat in features:
229
+ findings.append({"test":feat,"score":0.0,"note":"VLM unavailable","vlm_error":True})
230
+ scores.append(0.0)
231
  except Exception as e:
232
+ findings.append({"test":name,"error":str(e),"score":0})
233
+
234
+ # Context plausibility (separate call)
235
+ try:
236
+ resp=_vlm(img,SYS_CONTEXT,USR_CONTEXT)
237
+ if resp and not resp.startswith("VLM_ERROR"):
238
+ parsed=_parse(resp); sc=_score(parsed)
239
+ for feat in ["Temporal","Geographic","Weather","Social","Object Relations"]:
240
+ findings.append({"test":feat+" Plausibility","score":sc/5,"note":parsed.get("explanation","")[:100]})
241
+ scores.append(sc/5)
242
+ else: vlm_ok=False
243
+ except: pass
244
+
245
+ avg=float(np.mean(scores)) if scores else 0.0
246
+ conf=min(1.0,0.4+0.5*abs(avg))
247
+ if not vlm_ok: conf*=0.3
248
+ viol=[f["test"] for f in findings if f.get("score",0)>0.15 and "parent" not in f]
249
+ comp=[f["test"] for f in findings if f.get("score",0)<-0.1 and "parent" not in f]
250
+ rat=f"Semantic violations: {', '.join(viol[:5])}." if viol else f"Semantically consistent: {', '.join(comp[:5])}." if comp else "Semantic inconclusive."
251
  for f in findings:
252
+ if f.get("note") and "parent" not in f: rat+=f" [{f['test']}]: {f['note'][:100]}."
253
+ return AgentEvidence("Semantic Consistency Agent",np.clip(avg,-1,1),conf,
254
+ 0.0 if vlm_ok else 0.8, rat, [f for f in findings if "parent" not in f])