Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

anky2002 commited on 14 days ago

Commit

27f7870

verified ·

1 Parent(s): ada1738

Upload agents/metadata_agent.py with huggingface_hub

Browse files

Files changed (1) hide show

agents/metadata_agent.py +290 -0

agents/metadata_agent.py ADDED Viewed

	@@ -0,0 +1,290 @@

+"""
+FORENSIQ — Metadata Agent
+Analyzes file metadata and compression:
+  - EXIF validation (completeness and physical plausibility)
+  - Compression history (Error Level Analysis for double JPEG)
+  - AI metadata traces (XMP/IPTC parser for generator signatures)
+"""
+import numpy as np
+from PIL import Image, ImageChops, ImageEnhance
+from PIL.ExifTags import TAGS, GPSTAGS
+import io
+import struct
+from typing import Dict, Any, List, Tuple
+from agents.optical_agent import AgentEvidence
+# ─── EXIF Validation ─────────────────────────────────────────────────
+def analyze_exif(img: Image.Image) -> Dict[str, Any]:
+    """
+    Check EXIF metadata completeness and physical plausibility.
+    Real photos have rich EXIF; AI images have none or fabricated metadata.
+    """
+    try:
+        exif_data = img._getexif() or {}
+    except Exception:
+        exif_data = {}
+    decoded = {}
+    for tag_id, value in exif_data.items():
+        tag = TAGS.get(tag_id, str(tag_id))
+        try:
+            decoded[tag] = str(value)[:200]
+        except Exception:
+            decoded[tag] = "<binary>"
+    suspicious_flags = []
+    authenticity_markers = []
+    # Camera info
+    has_make = "Make" in decoded
+    has_model = "Model" in decoded
+    has_lens = "LensModel" in decoded or "LensInfo" in decoded
+    has_focal = "FocalLength" in decoded
+    has_exposure = "ExposureTime" in decoded
+    has_iso = "ISOSpeedRatings" in decoded
+    has_aperture = "FNumber" in decoded
+    has_datetime = "DateTime" in decoded or "DateTimeOriginal" in decoded
+    has_gps = "GPSInfo" in decoded
+    has_software = "Software" in decoded
+    camera_fields = sum([has_make, has_model, has_lens, has_focal,
+                         has_exposure, has_iso, has_aperture])
+    if camera_fields == 0:
+        suspicious_flags.append("No camera metadata (stripped or AI-generated)")
+    elif camera_fields >= 4:
+        authenticity_markers.append(f"Rich camera metadata ({camera_fields}/7 fields)")
+    if not decoded:
+        suspicious_flags.append("Completely empty EXIF (strong AI indicator)")
+    if has_software:
+        sw = decoded.get("Software", "").lower()
+        ai_keywords = ["stable diffusion", "midjourney", "dall-e", "comfyui",
+                        "automatic1111", "invoke", "flux", "sd", "novelai"]
+        edit_keywords = ["photoshop", "gimp", "lightroom", "capture one"]
+        if any(k in sw for k in ai_keywords):
+            suspicious_flags.append(f"AI generation software: {decoded['Software']}")
+        elif any(k in sw for k in edit_keywords):
+            suspicious_flags.append(f"Editing software detected: {decoded['Software']}")
+        else:
+            authenticity_markers.append(f"Software: {decoded['Software']}")
+    if has_datetime:
+        authenticity_markers.append("Timestamp present")
+    if has_gps:
+        authenticity_markers.append("GPS coordinates present (strong authenticity marker)")
+    # Physical plausibility checks
+    if has_focal and has_aperture:
+        try:
+            focal = float(str(decoded.get("FocalLength", "0")).split("/")[0])
+            fnumber = float(str(decoded.get("FNumber", "0")).split("/")[0])
+            if focal > 0 and fnumber > 0:
+                # Check if aperture is physically possible for focal length
+                if fnumber < 0.7 or fnumber > 64:
+                    suspicious_flags.append(f"Impossible aperture: f/{fnumber}")
+                else:
+                    authenticity_markers.append(f"Plausible optics: {focal}mm f/{fnumber}")
+        except Exception:
+            pass
+    # Score
+    n_suspicious = len(suspicious_flags)
+    n_authentic = len(authenticity_markers)
+    if n_suspicious == 0 and n_authentic >= 3:
+        score = -0.5
+        note = "Rich, plausible EXIF metadata (strong authenticity)"
+    elif n_suspicious >= 2 or (not decoded):
+        score = 0.5
+        note = "Missing or suspicious metadata"
+    elif n_suspicious == 1:
+        score = 0.2
+        note = "Minor metadata concern"
+    else:
+        score = -0.1
+        note = "Partial metadata present"
+    return {
+        "test": "EXIF Validation",
+        "total_fields": len(decoded),
+        "camera_fields": camera_fields,
+        "suspicious_flags": suspicious_flags,
+        "authenticity_markers": authenticity_markers,
+        "exif_data": decoded,
+        "score": score,
+        "note": note,
+    }
+# ─── Error Level Analysis (ELA) ─────────────────────────────────────
+def analyze_ela(img: Image.Image, quality: int = 90) -> Dict[str, Any]:
+    """
+    Re-save at known JPEG quality and compute pixel-level differences.
+    Manipulated regions show different error levels than unmodified areas.
+    Also detects double JPEG compression.
+    """
+    # Resave at target quality
+    buf = io.BytesIO()
+    img_rgb = img.convert("RGB")
+    img_rgb.save(buf, "JPEG", quality=quality)
+    buf.seek(0)
+    resaved = Image.open(buf).convert("RGB")
+    # Pixel difference
+    ela_img = ImageChops.difference(img_rgb, resaved)
+    # Scale for visibility
+    extrema = ela_img.getextrema()
+    max_diff = max([e[1] for e in extrema]) or 1
+    scale = 255.0 / max_diff
+    ela_visible = ImageEnhance.Brightness(ela_img).enhance(scale)
+    ela_arr = np.array(ela_img).astype(np.float64)
+    # Global statistics
+    global_mean = float(np.mean(ela_arr))
+    global_std = float(np.std(ela_arr))
+    # Block-level analysis (detect inconsistent compression)
+    block_means = []
+    h, w, _ = ela_arr.shape
+    bs = 32
+    for i in range(0, h - bs, bs):
+        for j in range(0, w - bs, bs):
+            block = ela_arr[i:i + bs, j:j + bs, :]
+            block_means.append(float(np.mean(block)))
+    block_means = np.array(block_means)
+    block_std = float(np.std(block_means))
+    block_range = float(np.max(block_means) - np.min(block_means))
+    # High block variance = inconsistent compression = manipulation
+    if block_std > 8.0 and block_range > 30:
+        score = 0.6
+        note = f"High ELA variance (σ={block_std:.1f}, range={block_range:.1f}) — manipulation regions detected"
+    elif block_std > 4.0:
+        score = 0.3
+        note = f"Moderate ELA variance (σ={block_std:.1f}) — possible manipulation"
+    elif global_std < 1.0:
+        score = 0.2
+        note = "Unusually uniform ELA (possible AI generation with no JPEG history)"
+    else:
+        score = -0.2
+        note = f"Consistent ELA levels (σ={block_std:.1f}, natural compression)"
+    return {
+        "test": "Error Level Analysis",
+        "global_mean": round(global_mean, 4),
+        "global_std": round(global_std, 4),
+        "block_std": round(block_std, 4),
+        "block_range": round(block_range, 4),
+        "score": score,
+        "note": note,
+        "ela_image": ela_visible,
+    }
+# ─── AI Metadata Traces ─────────────────────────────────────────────
+def analyze_ai_metadata(img: Image.Image) -> Dict[str, Any]:
+    """
+    Check for AI generation markers in XMP, IPTC, and other metadata.
+    C2PA, Content Credentials, and generator watermarks.
+    """
+    info = img.info or {}
+    suspicious_flags = []
+    found_traces = []
+    # Check PNG text chunks
+    for key in info:
+        key_lower = str(key).lower()
+        val = str(info[key])[:500]
+        ai_markers = ["stable diffusion", "comfyui", "automatic1111",
+                       "midjourney", "dall-e", "novelai", "invoke",
+                       "parameters", "prompt", "negative_prompt",
+                       "steps", "sampler", "cfg_scale", "model",
+                       "flux", "sd_model", "clip_skip"]
+        if any(m in key_lower or m in val.lower() for m in ai_markers):
+            found_traces.append(f"{key}: {val[:100]}")
+    # Check for XMP data
+    xmp_data = info.get("XML:com.adobe.xmp", "") or info.get("xmp", "")
+    if isinstance(xmp_data, bytes):
+        xmp_data = xmp_data.decode("utf-8", errors="ignore")
+    if "ai:" in xmp_data.lower() or "generativeAI" in xmp_data:
+        found_traces.append("XMP contains AI generation markers")
+    if "c2pa" in xmp_data.lower() or "contentcredentials" in xmp_data.lower():
+        found_traces.append("Content Credentials (C2PA) detected")
+    if found_traces:
+        score = 0.8
+        note = f"AI generation metadata found: {'; '.join(found_traces[:3])}"
+    else:
+        score = 0.0
+        note = "No AI metadata traces detected"
+    return {
+        "test": "AI Metadata Traces",
+        "traces_found": found_traces,
+        "info_keys": list(str(k) for k in info.keys())[:20],
+        "score": score,
+        "note": note,
+    }
+# ─── Main Agent Entry Point ─────────────────────────────────────────
+def run_metadata_agent(img: Image.Image) -> AgentEvidence:
+    """Run all metadata analysis tests."""
+    findings = []
+    scores = []
+    for fn in [analyze_exif, analyze_ela, analyze_ai_metadata]:
+        try:
+            result = fn(img)
+            findings.append(result)
+            scores.append(result["score"])
+        except Exception as e:
+            findings.append({"test": fn.__name__, "error": str(e), "score": 0})
+    avg_score = float(np.mean(scores)) if scores else 0.0
+    confidence = min(1.0, 0.5 + 0.5 * abs(avg_score))
+    violations = [f["test"] for f in findings if f.get("score", 0) > 0.2]
+    compliant = [f["test"] for f in findings if f.get("score", 0) < -0.1]
+    if violations:
+        rationale = f"Metadata violations: {', '.join(violations)}."
+    elif compliant:
+        rationale = f"Metadata consistent: {', '.join(compliant)}."
+    else:
+        rationale = "Metadata analysis inconclusive."
+    for f in findings:
+        if f.get("note"):
+            rationale += f" [{f['test']}]: {f['note']}."
+    # Extract ELA image if available
+    ela_img = None
+    for f in findings:
+        if "ela_image" in f:
+            ela_img = f["ela_image"]
+            del f["ela_image"]  # Don't include in serializable findings
+    return AgentEvidence(
+        agent_name="Metadata Agent",
+        violation_score=np.clip(avg_score, -1, 1),
+        confidence=confidence,
+        failure_prob=max(0.0, 1.0 - len(scores) / 3),
+        rationale=rationale,
+        sub_findings=findings,
+        visual_evidence=ela_img,
+    )