Spaces:

anky2002
/

FORENSIQ

Running

App Files Files Community

anky2002 commited on 14 days ago

Commit

cb24386

verified ·

1 Parent(s): 69446e8

Upload bayesian_engine.py with huggingface_hub

Browse files

Files changed (1) hide show

bayesian_engine.py +33 -16

bayesian_engine.py CHANGED Viewed

@@ -132,6 +132,7 @@ def temperature_scaling(prob: float, temperature: float = 1.5) -> float:
 def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
     """
     Main Bayesian evidence synthesis algorithm (Algorithm 1 from paper).
     Inputs: List of AgentEvidence from all 7 agents
     Output: ForensicVerdict with calibrated posterior probability
@@ -145,6 +146,7 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
     scores = []
     agent_indices = []
     active_agents = []
     for evidence in agent_results:
         # Get agent index
@@ -159,10 +161,6 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
         # Adjust reliability by failure probability
         effective_reliability = reliability * (1 - evidence.failure_prob)
-        # Skip agents with very high failure probability
-        if evidence.failure_prob > 0.8:
-            continue
         l_fake, l_real = compute_likelihood(
             evidence.violation_score,
             evidence.confidence,
@@ -173,6 +171,7 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
         scores.append(evidence.violation_score)
         agent_indices.append(idx)
         active_agents.append(evidence)
     if not likelihoods:
         return ForensicVerdict(
@@ -187,13 +186,22 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
     # Step 3: Apply independence correction
     corrected = apply_independence_correction(likelihoods, scores, agent_indices)
-    # Step 4: Bayesian fusion (Eq. 4 from paper)
     log_p_fake = np.log(p_fake + 1e-15)
     log_p_real = np.log(p_real + 1e-15)
-    for l_fake, l_real in corrected:
-        log_p_fake += np.log(max(l_fake, 1e-15))
-        log_p_real += np.log(max(l_real, 1e-15))
     # Normalize in log space for numerical stability
     log_max = max(log_p_fake, log_p_real)
@@ -222,16 +230,25 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
         verdict = "AUTHENTIC"
         conf_label = "High"
-    # Compute confidence based on agreement strength
-    score_magnitudes = [abs(s) for s in scores]
-    avg_magnitude = np.mean(score_magnitudes) if score_magnitudes else 0
-    agreement = np.mean([1 if (s > 0) == (np.mean(scores) > 0) else 0 for s in scores]) if scores else 0
-    confidence_numeric = min(1.0, avg_magnitude * agreement + 0.2)
-    # Step 7: Extract key evidence
     key_evidence = []
-    sorted_agents = sorted(active_agents, key=lambda a: abs(a.violation_score), reverse=True)
-    for agent in sorted_agents[:3]:
         direction = "VIOLATED" if agent.violation_score > 0.1 else "COMPLIANT" if agent.violation_score < -0.1 else "NEUTRAL"
         key_evidence.append(
             f"{agent.agent_name}: {direction} (score={agent.violation_score:.2f}, "

 def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
     """
     Main Bayesian evidence synthesis algorithm (Algorithm 1 from paper).
+    Now includes proper failure mode marginalization (Eq. 3).
     Inputs: List of AgentEvidence from all 7 agents
     Output: ForensicVerdict with calibrated posterior probability
     scores = []
     agent_indices = []
     active_agents = []
+    failure_probs = []
     for evidence in agent_results:
         # Get agent index
         # Adjust reliability by failure probability
         effective_reliability = reliability * (1 - evidence.failure_prob)
         l_fake, l_real = compute_likelihood(
             evidence.violation_score,
             evidence.confidence,
         scores.append(evidence.violation_score)
         agent_indices.append(idx)
         active_agents.append(evidence)
+        failure_probs.append(evidence.failure_prob)
     if not likelihoods:
         return ForensicVerdict(
     # Step 3: Apply independence correction
     corrected = apply_independence_correction(likelihoods, scores, agent_indices)
+    # Step 4: Failure Mode Marginalization (Eq. 3 from paper)
+    # P(Fake|E) = Σ_{F⊆A} [∏_{i∈F} f_i · ∏_{j∉F} (1-f_j)] · P(Fake|E_F)
+    # Approximate: instead of 2^N subsets, use weighted combination
+    # For each agent, mix its likelihood with uninformative 0.5 based on failure prob
     log_p_fake = np.log(p_fake + 1e-15)
     log_p_real = np.log(p_real + 1e-15)
+    for i, (l_fake, l_real) in enumerate(corrected):
+        fi = failure_probs[i]
+        # Marginalize: (1-fi)*likelihood + fi*0.5 (uninformative if failed)
+        l_fake_marg = (1 - fi) * l_fake + fi * 0.5
+        l_real_marg = (1 - fi) * l_real + fi * 0.5
+        log_p_fake += np.log(max(l_fake_marg, 1e-15))
+        log_p_real += np.log(max(l_real_marg, 1e-15))
     # Normalize in log space for numerical stability
     log_max = max(log_p_fake, log_p_real)
         verdict = "AUTHENTIC"
         conf_label = "High"
+    # Compute confidence based on agreement strength and active agent count
+    non_failed = [s for s, f in zip(scores, failure_probs) if f < 0.5]
+    if non_failed:
+        score_magnitudes = [abs(s) for s in non_failed]
+        avg_magnitude = float(np.mean(score_magnitudes))
+        agreement = float(np.mean([1 if (s > 0) == (np.mean(non_failed) > 0) else 0 for s in non_failed]))
+        agent_coverage = len(non_failed) / 7.0
+        confidence_numeric = min(1.0, avg_magnitude * agreement * agent_coverage + 0.1)
+    else:
+        confidence_numeric = 0.1
+    # Step 7: Extract key evidence (top 3 strongest signals from non-failed agents)
     key_evidence = []
+    sorted_agents = sorted(
+        [(a, f) for a, f in zip(active_agents, failure_probs) if f < 0.5],
+        key=lambda x: abs(x[0].violation_score),
+        reverse=True,
+    )
+    for agent, fp in sorted_agents[:3]:
         direction = "VIOLATED" if agent.violation_score > 0.1 else "COMPLIANT" if agent.violation_score < -0.1 else "NEUTRAL"
         key_evidence.append(
             f"{agent.agent_name}: {direction} (score={agent.violation_score:.2f}, "