Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

anky2002 commited on 14 days ago

Commit

69446e8

verified ·

1 Parent(s): 37025de

Upload agents/metadata_agent.py with huggingface_hub

Browse files

Files changed (1) hide show

agents/metadata_agent.py +156 -2

agents/metadata_agent.py CHANGED Viewed

@@ -241,13 +241,167 @@ def analyze_ai_metadata(img: Image.Image) -> Dict[str, Any]:
     }
 # ─── Main Agent Entry Point ─────────────────────────────────────────
 def run_metadata_agent(img: Image.Image) -> AgentEvidence:
     """Run all metadata analysis tests."""
     findings = []
     scores = []
-    for fn in [analyze_exif, analyze_ela, analyze_ai_metadata]:
         try:
             result = fn(img)
             findings.append(result)
@@ -283,7 +437,7 @@ def run_metadata_agent(img: Image.Image) -> AgentEvidence:
         agent_name="Metadata Agent",
         violation_score=np.clip(avg_score, -1, 1),
         confidence=confidence,
-        failure_prob=max(0.0, 1.0 - len(scores) / 3),
         rationale=rationale,
         sub_findings=findings,
         visual_evidence=ela_img,

     }
+# ─── Thumbnail Consistency ───────────────────────────────────────────
+def analyze_thumbnail_consistency(img: Image.Image) -> Dict[str, Any]:
+    """
+    JPEG files often embed a thumbnail. If the main image was manipulated
+    but the thumbnail wasn't updated, they'll differ.
+    """
+    try:
+        exif_data = img._getexif() or {}
+    except Exception:
+        exif_data = {}
+    # Check for embedded thumbnail
+    has_thumbnail = False
+    try:
+        if hasattr(img, 'applist'):
+            for app in img.applist:
+                if b'thumbnail' in app[1].lower() if isinstance(app[1], bytes) else False:
+                    has_thumbnail = True
+    except Exception:
+        pass
+    # Check EXIF thumbnail tag (tag 513 = JPEGInterchangeFormat)
+    if 513 in exif_data or 514 in exif_data:
+        has_thumbnail = True
+    if not has_thumbnail:
+        return {
+            "test": "Thumbnail Consistency",
+            "score": 0.0,
+            "note": "No embedded thumbnail found for comparison",
+        }
+    return {
+        "test": "Thumbnail Consistency",
+        "has_thumbnail": True,
+        "score": -0.1,
+        "note": "Embedded thumbnail present (consistent with real camera output)",
+    }
+# ─── Watermark Detection ────────────────────────────────────────────
+def analyze_watermarks(img: Image.Image) -> Dict[str, Any]:
+    """
+    Detect invisible watermarks (frequency domain) and visible watermarks.
+    Some AI generators embed identifying watermarks.
+    """
+    gray = np.array(img.convert("L")).astype(np.float64)
+    # Check for periodic watermark patterns in FFT
+    fft = np.fft.fft2(gray)
+    fft_shift = np.fft.fftshift(fft)
+    magnitude = np.log(np.abs(fft_shift) + 1)
+    h, w = magnitude.shape
+    cy, cx = h // 2, w // 2
+    # Remove DC component and check for suspicious isolated peaks
+    magnitude_clean = magnitude.copy()
+    magnitude_clean[cy - 3:cy + 3, cx - 3:cx + 3] = 0
+    # Find isolated bright spots (potential watermark carriers)
+    from scipy.ndimage import maximum_filter
+    local_max = maximum_filter(magnitude_clean, size=10)
+    peaks = (magnitude_clean == local_max) & (magnitude_clean > np.percentile(magnitude_clean, 99.5))
+    n_isolated_peaks = int(np.sum(peaks))
+    # Check image info for C2PA / Content Credentials
+    info = img.info or {}
+    c2pa_found = False
+    for key in info:
+        key_str = str(key).lower()
+        val_str = str(info[key])[:500].lower()
+        if any(marker in key_str or marker in val_str
+               for marker in ["c2pa", "contentcredentials", "content_authenticity"]):
+            c2pa_found = True
+    if c2pa_found:
+        score = 0.0
+        note = "C2PA Content Credentials watermark detected (provenance tracking)"
+    elif n_isolated_peaks > 20:
+        score = 0.2
+        note = f"Suspicious frequency-domain peaks ({n_isolated_peaks}, possible embedded watermark)"
+    else:
+        score = 0.0
+        note = f"No watermark signatures detected ({n_isolated_peaks} peaks)"
+    return {
+        "test": "Watermark Detection",
+        "isolated_peaks": n_isolated_peaks,
+        "c2pa_found": c2pa_found,
+        "score": score,
+        "note": note,
+    }
+# ─── Compression Ghost Detection ────────────────────────────────────
+def analyze_compression_ghosts(img: Image.Image) -> Dict[str, Any]:
+    """
+    Double JPEG compression leaves 'ghosts' — periodic artifacts at
+    block boundaries that differ from single compression.
+    Detect by analyzing 8×8 block boundary discontinuities.
+    """
+    gray = np.array(img.convert("L")).astype(np.float64)
+    h, w = gray.shape
+    h_crop, w_crop = (h // 8) * 8, (w // 8) * 8
+    gray = gray[:h_crop, :w_crop]
+    # Measure discontinuity at 8×8 block boundaries
+    boundary_diffs = []
+    interior_diffs = []
+    for i in range(1, h_crop):
+        if i % 8 == 0:
+            # Block boundary row
+            boundary_diffs.extend(np.abs(gray[i, :] - gray[i - 1, :]).tolist())
+        else:
+            interior_diffs.extend(np.abs(gray[i, :] - gray[i - 1, :]).tolist())
+    for j in range(1, w_crop):
+        if j % 8 == 0:
+            boundary_diffs.extend(np.abs(gray[:, j] - gray[:, j - 1]).tolist())
+        else:
+            interior_diffs.extend(np.abs(gray[:, j] - gray[:, j - 1]).tolist())
+    if boundary_diffs and interior_diffs:
+        boundary_mean = float(np.mean(boundary_diffs))
+        interior_mean = float(np.mean(interior_diffs))
+        blockiness = boundary_mean / (interior_mean + 1e-9)
+    else:
+        blockiness = 1.0
+    # Blockiness > 1.2 suggests JPEG compression; > 1.5 suggests double compression
+    if blockiness > 1.5:
+        score = 0.3
+        note = f"Strong block boundary artifacts (blockiness={blockiness:.3f}, possible double JPEG)"
+    elif blockiness > 1.2:
+        score = -0.1
+        note = f"Normal JPEG blockiness ({blockiness:.3f})"
+    elif blockiness < 1.02:
+        score = 0.1
+        note = f"No block boundaries (blockiness={blockiness:.3f}, non-JPEG or AI)"
+    else:
+        score = 0.0
+        note = f"Mild blockiness ({blockiness:.3f})"
+    return {
+        "test": "Compression Ghost Detection",
+        "blockiness_ratio": round(blockiness, 4),
+        "score": score,
+        "note": note,
+    }
 # ─── Main Agent Entry Point ─────────────────────────────────────────
 def run_metadata_agent(img: Image.Image) -> AgentEvidence:
     """Run all metadata analysis tests."""
     findings = []
     scores = []
+    for fn in [analyze_exif, analyze_ela, analyze_ai_metadata,
+               analyze_thumbnail_consistency, analyze_watermarks, analyze_compression_ghosts]:
         try:
             result = fn(img)
             findings.append(result)
         agent_name="Metadata Agent",
         violation_score=np.clip(avg_score, -1, 1),
         confidence=confidence,
+        failure_prob=max(0.0, 1.0 - len(scores) / 6),
         rationale=rationale,
         sub_findings=findings,
         visual_evidence=ela_img,