Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

anky2002 commited on 14 days ago

Commit

ada1738

verified ·

1 Parent(s): 2f3f5e8

Upload agents/semantic_agent.py with huggingface_hub

Browse files

Files changed (1) hide show

agents/semantic_agent.py +358 -0

agents/semantic_agent.py ADDED Viewed

	@@ -0,0 +1,358 @@

+"""
+FORENSIQ — Semantic Consistency Agent (VLM-powered)
+Uses Qwen2.5-VL via HF Inference to evaluate:
+  - Lighting consistency (shadow convergence, inverse square law)
+  - Material properties (BRDF anomalies, reflectance)
+  - Anatomical errors (finger count, joint angles, facial symmetry)
+  - Physical plausibility (gravity, perspective, scale)
+"""
+import os
+import base64
+import io
+import json
+import re
+import numpy as np
+from PIL import Image
+from typing import Dict, Any, Optional
+from dataclasses import dataclass
+from agents.optical_agent import AgentEvidence
+# ─── VLM Interface ───────────────────────────────────────────────────
+def _encode_image_b64(img: Image.Image, max_size: int = 1024) -> str:
+    """Encode PIL image as base64 JPEG for API submission."""
+    # Resize if too large
+    w, h = img.size
+    if max(w, h) > max_size:
+        ratio = max_size / max(w, h)
+        img = img.resize((int(w * ratio), int(h * ratio)), Image.LANCZOS)
+    buf = io.BytesIO()
+    img.convert("RGB").save(buf, format="JPEG", quality=90)
+    return base64.b64encode(buf.getvalue()).decode("utf-8")
+def _call_vlm(img: Image.Image, system_prompt: str, user_prompt: str) -> Optional[str]:
+    """Call Qwen2.5-VL-7B via HF router (OpenAI-compatible endpoint)."""
+    try:
+        from openai import OpenAI
+    except ImportError:
+        return None
+    token = os.environ.get("HF_TOKEN", "")
+    if not token:
+        return None
+    try:
+        client = OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=token,
+        )
+        b64 = _encode_image_b64(img)
+        response = client.chat.completions.create(
+            model="Qwen/Qwen2.5-VL-72B-Instruct",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
+                        },
+                        {"type": "text", "text": user_prompt},
+                    ],
+                },
+            ],
+            max_tokens=1500,
+            temperature=0.1,
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"VLM_ERROR: {str(e)}"
+def _parse_vlm_json(text: str) -> Dict[str, Any]:
+    """Extract JSON from VLM response (handles markdown code blocks)."""
+    if text is None:
+        return {}
+    # Try to find JSON block
+    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
+    if json_match:
+        try:
+            return json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            pass
+    # Try direct parse
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+    # Try to find any {...} block
+    brace_match = re.search(r'\{[^{}]*\}', text, re.DOTALL)
+    if brace_match:
+        try:
+            return json.loads(brace_match.group(0))
+        except json.JSONDecodeError:
+            pass
+    return {"raw_response": text}
+# ─── Lighting Consistency ────────────────────────────────────────────
+LIGHTING_SYSTEM_PROMPT = """You are an expert forensic image analyst specializing in lighting physics and photogrammetry. Your task is to analyze images for lighting consistency violations that indicate AI generation or manipulation.
+You understand:
+- Shadow direction convergence (all shadows must trace back to consistent light source positions)
+- Inverse square law (light intensity falls off as 1/r²)
+- Specular highlight placement (must be consistent with light source direction)
+- Ambient vs direct lighting ratios
+- Multiple light source scenarios
+- Reflection consistency in eyes, glasses, and shiny surfaces
+Be precise, clinical, and evidence-based. Cite specific image regions when noting anomalies."""
+LIGHTING_USER_PROMPT = """Analyze this image for lighting consistency. Examine:
+1. Shadow directions — do all shadows point to consistent light source(s)?
+2. Shadow softness — is it consistent with the apparent light source distance?
+3. Specular highlights — are reflections in eyes, skin, and objects consistent?
+4. Light falloff — does brightness decrease naturally with distance from light?
+5. Ambient lighting — is the ambient-to-direct ratio physically plausible?
+Respond in JSON format:
+{
+    "lighting_consistent": true/false,
+    "shadow_direction_consistent": true/false,
+    "specular_highlights_consistent": true/false,
+    "light_falloff_natural": true/false,
+    "anomalies": ["list of specific anomalies found, empty if none"],
+    "confidence": 0.0-1.0,
+    "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
+    "explanation": "detailed reasoning"
+}"""
+def analyze_lighting(img: Image.Image) -> Dict[str, Any]:
+    response = _call_vlm(img, LIGHTING_SYSTEM_PROMPT, LIGHTING_USER_PROMPT)
+    if response and not response.startswith("VLM_ERROR"):
+        parsed = _parse_vlm_json(response)
+        verdict = parsed.get("verdict", "UNKNOWN")
+        anomalies = parsed.get("anomalies", [])
+        confidence = parsed.get("confidence", 0.5)
+        if verdict == "MANIPULATED":
+            score = 0.7
+        elif verdict == "SUSPICIOUS":
+            score = 0.4
+        elif verdict == "AUTHENTIC":
+            score = -0.4
+        else:
+            score = 0.0
+        return {
+            "test": "Lighting Consistency",
+            "vlm_analysis": parsed,
+            "anomalies": anomalies,
+            "score": score,
+            "confidence": confidence,
+            "note": parsed.get("explanation", response[:200]),
+        }
+    else:
+        return {
+            "test": "Lighting Consistency",
+            "score": 0.0,
+            "note": f"VLM unavailable: {response or 'no HF_TOKEN'}",
+            "vlm_error": True,
+        }
+# ─── Anatomical Analysis ────────────────────────────────────────────
+ANATOMY_SYSTEM_PROMPT = """You are an expert forensic analyst specializing in human anatomy verification in images. AI-generated images frequently contain anatomical errors that are physically impossible.
+You have encyclopedic knowledge of:
+- Hand anatomy: finger count (exactly 5 per hand), joint bending directions, nail placement, proportions
+- Facial anatomy: bilateral symmetry, ear alignment, eye spacing, teeth regularity
+- Body proportions: limb ratios, joint angles, skeletal plausibility
+- Skin texture: pore consistency, wrinkle patterns, hair follicle distribution
+- Clothing physics: fabric draping, seam continuity, button alignment
+AI-generated images commonly fail on: extra/missing fingers, impossible joint angles, asymmetric ears, teeth anomalies, melted/merged body parts, clothing that defies physics."""
+ANATOMY_USER_PROMPT = """Carefully examine this image for anatomical correctness. Check:
+1. Hands: Count fingers on each visible hand. Check joint angles and proportions.
+2. Face: Check bilateral symmetry, ear alignment, eye consistency, teeth.
+3. Body: Check limb proportions, joint angles, body part connections.
+4. Skin/Hair: Check texture consistency, pore patterns, hairline.
+5. Clothing: Check seam continuity, fabric physics, accessory consistency.
+Respond in JSON format:
+{
+    "contains_people": true/false,
+    "finger_count_correct": true/false/null,
+    "facial_symmetry_ok": true/false/null,
+    "body_proportions_ok": true/false/null,
+    "skin_texture_natural": true/false/null,
+    "clothing_physics_ok": true/false/null,
+    "anomalies": ["list of specific anatomical errors found"],
+    "confidence": 0.0-1.0,
+    "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
+    "explanation": "detailed reasoning with specific observations"
+}"""
+def analyze_anatomy(img: Image.Image) -> Dict[str, Any]:
+    response = _call_vlm(img, ANATOMY_SYSTEM_PROMPT, ANATOMY_USER_PROMPT)
+    if response and not response.startswith("VLM_ERROR"):
+        parsed = _parse_vlm_json(response)
+        if not parsed.get("contains_people", True):
+            return {
+                "test": "Anatomical Analysis",
+                "score": 0.0,
+                "note": "No people detected in image — anatomical analysis not applicable",
+                "vlm_analysis": parsed,
+            }
+        verdict = parsed.get("verdict", "UNKNOWN")
+        anomalies = parsed.get("anomalies", [])
+        if verdict == "MANIPULATED":
+            score = 0.8
+        elif verdict == "SUSPICIOUS":
+            score = 0.4
+        elif verdict == "AUTHENTIC":
+            score = -0.4
+        else:
+            score = 0.0
+        return {
+            "test": "Anatomical Analysis",
+            "vlm_analysis": parsed,
+            "anomalies": anomalies,
+            "score": score,
+            "confidence": parsed.get("confidence", 0.5),
+            "note": parsed.get("explanation", response[:200]),
+        }
+    else:
+        return {
+            "test": "Anatomical Analysis",
+            "score": 0.0,
+            "note": f"VLM unavailable: {response or 'no HF_TOKEN'}",
+            "vlm_error": True,
+        }
+# ─── Material / Physics Plausibility ────────────────────────────────
+PHYSICS_SYSTEM_PROMPT = """You are an expert forensic physicist who analyzes images for violations of physical laws. AI-generated images often violate basic physics because generative models learn visual patterns without understanding underlying physics.
+Your expertise covers:
+- Material reflectance: metals should reflect surroundings, glass should refract, matte surfaces shouldn't have specular highlights
+- BRDF consistency: bidirectional reflectance should be consistent across the same material
+- Gravity and structural physics: objects should rest on surfaces, liquids should be level, structures should be load-bearing
+- Perspective geometry: parallel lines should converge to consistent vanishing points
+- Scale consistency: known objects should be proportional to each other
+- Transparency/refraction: glass, water, and transparent objects should distort backgrounds correctly"""
+PHYSICS_USER_PROMPT = """Analyze this image for physical plausibility violations:
+1. Material properties: Are reflections, textures, and surface properties physically correct?
+2. Perspective: Do parallel lines converge to consistent vanishing points?
+3. Scale: Are objects proportional to each other and known references?
+4. Gravity: Do objects rest naturally? Are liquids level? Do fabrics drape correctly?
+5. Transparency: Do glass, water, or transparent objects refract/distort correctly?
+Respond in JSON format:
+{
+    "materials_consistent": true/false,
+    "perspective_correct": true/false,
+    "scale_consistent": true/false,
+    "gravity_plausible": true/false,
+    "anomalies": ["list of specific physics violations"],
+    "confidence": 0.0-1.0,
+    "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED",
+    "explanation": "detailed reasoning"
+}"""
+def analyze_physics(img: Image.Image) -> Dict[str, Any]:
+    response = _call_vlm(img, PHYSICS_SYSTEM_PROMPT, PHYSICS_USER_PROMPT)
+    if response and not response.startswith("VLM_ERROR"):
+        parsed = _parse_vlm_json(response)
+        verdict = parsed.get("verdict", "UNKNOWN")
+        anomalies = parsed.get("anomalies", [])
+        if verdict == "MANIPULATED":
+            score = 0.6
+        elif verdict == "SUSPICIOUS":
+            score = 0.3
+        elif verdict == "AUTHENTIC":
+            score = -0.4
+        else:
+            score = 0.0
+        return {
+            "test": "Physical Plausibility",
+            "vlm_analysis": parsed,
+            "anomalies": anomalies,
+            "score": score,
+            "confidence": parsed.get("confidence", 0.5),
+            "note": parsed.get("explanation", response[:200]),
+        }
+    else:
+        return {
+            "test": "Physical Plausibility",
+            "score": 0.0,
+            "note": f"VLM unavailable: {response or 'no HF_TOKEN'}",
+            "vlm_error": True,
+        }
+# ─── Main Agent Entry Point ─────────────────────────────────────────
+def run_semantic_agent(img: Image.Image) -> AgentEvidence:
+    """Run all semantic consistency tests via VLM."""
+    findings = []
+    scores = []
+    vlm_available = True
+    for fn in [analyze_lighting, analyze_anatomy, analyze_physics]:
+        try:
+            result = fn(img)
+            findings.append(result)
+            scores.append(result["score"])
+            if result.get("vlm_error"):
+                vlm_available = False
+        except Exception as e:
+            findings.append({"test": fn.__name__, "error": str(e), "score": 0})
+    avg_score = float(np.mean(scores)) if scores else 0.0
+    confidence = min(1.0, 0.4 + 0.5 * abs(avg_score))
+    if not vlm_available:
+        confidence *= 0.3  # Low confidence without VLM
+    violations = [f["test"] for f in findings if f.get("score", 0) > 0.2]
+    compliant = [f["test"] for f in findings if f.get("score", 0) < -0.1]
+    if violations:
+        rationale = f"Semantic violations detected: {', '.join(violations)}."
+    elif compliant:
+        rationale = f"Semantic consistency confirmed: {', '.join(compliant)}."
+    else:
+        rationale = "Semantic analysis inconclusive."
+    for f in findings:
+        if f.get("note"):
+            rationale += f" [{f['test']}]: {f['note'][:150]}."
+    return AgentEvidence(
+        agent_name="Semantic Consistency Agent",
+        violation_score=np.clip(avg_score, -1, 1),
+        confidence=confidence,
+        failure_prob=0.0 if vlm_available else 0.8,
+        rationale=rationale,
+        sub_findings=findings,
+    )