Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

anky2002 commited on 14 days ago

Commit

25b4b0d

verified ·

1 Parent(s): 27f7870

Upload agents/text_agent.py with huggingface_hub

Browse files

Files changed (1) hide show

agents/text_agent.py +171 -0

agents/text_agent.py ADDED Viewed

	@@ -0,0 +1,171 @@

+"""
+FORENSIQ — Text & Typography Agent (VLM-powered)
+Specialized for text detection in images:
+  - Text legibility (OCR for gibberish detection)
+  - Typography consistency (font, kerning, stroke width)
+  - Sign/label plausibility
+"""
+import os
+import numpy as np
+from PIL import Image
+from typing import Dict, Any
+from agents.optical_agent import AgentEvidence
+from agents.semantic_agent import _call_vlm, _parse_vlm_json
+# ─── Text Legibility & Typography ────────────────────────────────────
+TEXT_SYSTEM_PROMPT = """You are an expert typographic forensic analyst. AI-generated images frequently produce text that is visually plausible but linguistically or typographically impossible.
+Your expertise covers:
+- Character formation: letters should have consistent stroke width, proper serifs/sans-serif style
+- Spelling and language: text should form real words in identifiable languages
+- Kerning and spacing: letter spacing should be typographically correct
+- Font consistency: all text in a sign/label should use consistent fonts
+- Text perspective: text on surfaces should follow perspective geometry
+- Sign plausibility: signs should contain meaningful, contextually appropriate text
+- Reflection/shadow text: reflected or shadowed text should be geometrically consistent
+Common AI failures: gibberish text, mixed scripts, impossible letter forms, inconsistent fonts within a word, text that doesn't follow surface geometry, misspelled common words."""
+TEXT_USER_PROMPT = """Examine this image for any visible text (signs, labels, clothing, screens, documents, etc.).
+For each text element found, analyze:
+1. Is the text readable and does it form real words?
+2. Is the spelling correct?
+3. Is the font consistent within each text element?
+4. Does the text follow the surface geometry correctly?
+5. Is the kerning/spacing natural?
+6. Is the text contextually appropriate for the scene?
+If NO text is visible, report that.
+Respond in JSON format:
+{
+    "text_found": true/false,
+    "text_elements": [
+        {
+            "content": "what the text says (or 'GIBBERISH' if unreadable)",
+            "location": "where in the image",
+            "readable": true/false,
+            "spelling_correct": true/false,
+            "font_consistent": true/false,
+            "perspective_correct": true/false
+        }
+    ],
+    "anomalies": ["list of text anomalies found"],
+    "confidence": 0.0-1.0,
+    "verdict": "AUTHENTIC" or "SUSPICIOUS" or "MANIPULATED" or "NO_TEXT",
+    "explanation": "detailed reasoning"
+}"""
+def analyze_text(img: Image.Image) -> Dict[str, Any]:
+    """Analyze text legibility and typography via VLM."""
+    response = _call_vlm(img, TEXT_SYSTEM_PROMPT, TEXT_USER_PROMPT)
+    if response and not response.startswith("VLM_ERROR"):
+        parsed = _parse_vlm_json(response)
+        if not parsed.get("text_found", False) or parsed.get("verdict") == "NO_TEXT":
+            return {
+                "test": "Text & Typography",
+                "score": 0.0,
+                "note": "No text visible in image — text analysis not applicable",
+                "vlm_analysis": parsed,
+                "text_found": False,
+            }
+        verdict = parsed.get("verdict", "UNKNOWN")
+        anomalies = parsed.get("anomalies", [])
+        text_elements = parsed.get("text_elements", [])
+        # Count problematic elements
+        n_elements = len(text_elements)
+        n_gibberish = sum(1 for t in text_elements if not t.get("readable", True))
+        n_misspelled = sum(1 for t in text_elements if not t.get("spelling_correct", True))
+        n_bad_font = sum(1 for t in text_elements if not t.get("font_consistent", True))
+        if verdict == "MANIPULATED" or (n_gibberish > 0):
+            score = 0.8
+        elif verdict == "SUSPICIOUS" or n_misspelled > 0 or n_bad_font > 0:
+            score = 0.4
+        elif verdict == "AUTHENTIC":
+            score = -0.4
+        else:
+            score = 0.0
+        return {
+            "test": "Text & Typography",
+            "vlm_analysis": parsed,
+            "text_found": True,
+            "text_elements": text_elements,
+            "anomalies": anomalies,
+            "n_elements": n_elements,
+            "n_gibberish": n_gibberish,
+            "n_misspelled": n_misspelled,
+            "score": score,
+            "confidence": parsed.get("confidence", 0.5),
+            "note": parsed.get("explanation", response[:200]),
+        }
+    else:
+        return {
+            "test": "Text & Typography",
+            "score": 0.0,
+            "note": f"VLM unavailable: {response or 'no HF_TOKEN'}",
+            "vlm_error": True,
+            "text_found": False,
+        }
+# ─── Main Agent Entry Point ─────────────────────────────────────────
+def run_text_agent(img: Image.Image) -> AgentEvidence:
+    """Run text and typography analysis."""
+    findings = []
+    scores = []
+    vlm_available = True
+    try:
+        result = analyze_text(img)
+        findings.append(result)
+        scores.append(result["score"])
+        if result.get("vlm_error"):
+            vlm_available = False
+    except Exception as e:
+        findings.append({"test": "Text & Typography", "error": str(e), "score": 0})
+    avg_score = float(np.mean(scores)) if scores else 0.0
+    confidence = min(1.0, 0.4 + 0.5 * abs(avg_score))
+    if not vlm_available:
+        confidence *= 0.3
+    # Check if text was found at all
+    text_found = any(f.get("text_found", False) for f in findings)
+    if not text_found and vlm_available:
+        rationale = "No text visible in image. Text agent not applicable."
+        confidence = 0.1
+    elif not vlm_available:
+        rationale = "VLM service unavailable. Text analysis skipped."
+    else:
+        violations = [f["test"] for f in findings if f.get("score", 0) > 0.2]
+        if violations:
+            rationale = f"Text anomalies detected."
+        else:
+            rationale = f"Text appears legitimate and consistent."
+        for f in findings:
+            if f.get("note"):
+                rationale += f" {f['note'][:200]}"
+    return AgentEvidence(
+        agent_name="Text & Typography Agent",
+        violation_score=np.clip(avg_score, -1, 1),
+        confidence=confidence,
+        failure_prob=0.0 if vlm_available else 0.9,
+        rationale=rationale,
+        sub_findings=findings,
+    )