anky2002 commited on
Commit
cb24386
verified
1 Parent(s): 69446e8

Upload bayesian_engine.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. bayesian_engine.py +33 -16
bayesian_engine.py CHANGED
@@ -132,6 +132,7 @@ def temperature_scaling(prob: float, temperature: float = 1.5) -> float:
132
  def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
133
  """
134
  Main Bayesian evidence synthesis algorithm (Algorithm 1 from paper).
 
135
 
136
  Inputs: List of AgentEvidence from all 7 agents
137
  Output: ForensicVerdict with calibrated posterior probability
@@ -145,6 +146,7 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
145
  scores = []
146
  agent_indices = []
147
  active_agents = []
 
148
 
149
  for evidence in agent_results:
150
  # Get agent index
@@ -159,10 +161,6 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
159
  # Adjust reliability by failure probability
160
  effective_reliability = reliability * (1 - evidence.failure_prob)
161
 
162
- # Skip agents with very high failure probability
163
- if evidence.failure_prob > 0.8:
164
- continue
165
-
166
  l_fake, l_real = compute_likelihood(
167
  evidence.violation_score,
168
  evidence.confidence,
@@ -173,6 +171,7 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
173
  scores.append(evidence.violation_score)
174
  agent_indices.append(idx)
175
  active_agents.append(evidence)
 
176
 
177
  if not likelihoods:
178
  return ForensicVerdict(
@@ -187,13 +186,22 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
187
  # Step 3: Apply independence correction
188
  corrected = apply_independence_correction(likelihoods, scores, agent_indices)
189
 
190
- # Step 4: Bayesian fusion (Eq. 4 from paper)
 
 
 
 
191
  log_p_fake = np.log(p_fake + 1e-15)
192
  log_p_real = np.log(p_real + 1e-15)
193
 
194
- for l_fake, l_real in corrected:
195
- log_p_fake += np.log(max(l_fake, 1e-15))
196
- log_p_real += np.log(max(l_real, 1e-15))
 
 
 
 
 
197
 
198
  # Normalize in log space for numerical stability
199
  log_max = max(log_p_fake, log_p_real)
@@ -222,16 +230,25 @@ def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
222
  verdict = "AUTHENTIC"
223
  conf_label = "High"
224
 
225
- # Compute confidence based on agreement strength
226
- score_magnitudes = [abs(s) for s in scores]
227
- avg_magnitude = np.mean(score_magnitudes) if score_magnitudes else 0
228
- agreement = np.mean([1 if (s > 0) == (np.mean(scores) > 0) else 0 for s in scores]) if scores else 0
229
- confidence_numeric = min(1.0, avg_magnitude * agreement + 0.2)
 
 
 
 
 
230
 
231
- # Step 7: Extract key evidence
232
  key_evidence = []
233
- sorted_agents = sorted(active_agents, key=lambda a: abs(a.violation_score), reverse=True)
234
- for agent in sorted_agents[:3]:
 
 
 
 
235
  direction = "VIOLATED" if agent.violation_score > 0.1 else "COMPLIANT" if agent.violation_score < -0.1 else "NEUTRAL"
236
  key_evidence.append(
237
  f"{agent.agent_name}: {direction} (score={agent.violation_score:.2f}, "
 
132
  def bayesian_synthesis(agent_results: List[AgentEvidence]) -> ForensicVerdict:
133
  """
134
  Main Bayesian evidence synthesis algorithm (Algorithm 1 from paper).
135
+ Now includes proper failure mode marginalization (Eq. 3).
136
 
137
  Inputs: List of AgentEvidence from all 7 agents
138
  Output: ForensicVerdict with calibrated posterior probability
 
146
  scores = []
147
  agent_indices = []
148
  active_agents = []
149
+ failure_probs = []
150
 
151
  for evidence in agent_results:
152
  # Get agent index
 
161
  # Adjust reliability by failure probability
162
  effective_reliability = reliability * (1 - evidence.failure_prob)
163
 
 
 
 
 
164
  l_fake, l_real = compute_likelihood(
165
  evidence.violation_score,
166
  evidence.confidence,
 
171
  scores.append(evidence.violation_score)
172
  agent_indices.append(idx)
173
  active_agents.append(evidence)
174
+ failure_probs.append(evidence.failure_prob)
175
 
176
  if not likelihoods:
177
  return ForensicVerdict(
 
186
  # Step 3: Apply independence correction
187
  corrected = apply_independence_correction(likelihoods, scores, agent_indices)
188
 
189
+ # Step 4: Failure Mode Marginalization (Eq. 3 from paper)
190
+ # P(Fake|E) = 危_{F鈯咥} [鈭廮{i鈭團} f_i 路 鈭廮{j鈭塅} (1-f_j)] 路 P(Fake|E_F)
191
+ # Approximate: instead of 2^N subsets, use weighted combination
192
+ # For each agent, mix its likelihood with uninformative 0.5 based on failure prob
193
+
194
  log_p_fake = np.log(p_fake + 1e-15)
195
  log_p_real = np.log(p_real + 1e-15)
196
 
197
+ for i, (l_fake, l_real) in enumerate(corrected):
198
+ fi = failure_probs[i]
199
+ # Marginalize: (1-fi)*likelihood + fi*0.5 (uninformative if failed)
200
+ l_fake_marg = (1 - fi) * l_fake + fi * 0.5
201
+ l_real_marg = (1 - fi) * l_real + fi * 0.5
202
+
203
+ log_p_fake += np.log(max(l_fake_marg, 1e-15))
204
+ log_p_real += np.log(max(l_real_marg, 1e-15))
205
 
206
  # Normalize in log space for numerical stability
207
  log_max = max(log_p_fake, log_p_real)
 
230
  verdict = "AUTHENTIC"
231
  conf_label = "High"
232
 
233
+ # Compute confidence based on agreement strength and active agent count
234
+ non_failed = [s for s, f in zip(scores, failure_probs) if f < 0.5]
235
+ if non_failed:
236
+ score_magnitudes = [abs(s) for s in non_failed]
237
+ avg_magnitude = float(np.mean(score_magnitudes))
238
+ agreement = float(np.mean([1 if (s > 0) == (np.mean(non_failed) > 0) else 0 for s in non_failed]))
239
+ agent_coverage = len(non_failed) / 7.0
240
+ confidence_numeric = min(1.0, avg_magnitude * agreement * agent_coverage + 0.1)
241
+ else:
242
+ confidence_numeric = 0.1
243
 
244
+ # Step 7: Extract key evidence (top 3 strongest signals from non-failed agents)
245
  key_evidence = []
246
+ sorted_agents = sorted(
247
+ [(a, f) for a, f in zip(active_agents, failure_probs) if f < 0.5],
248
+ key=lambda x: abs(x[0].violation_score),
249
+ reverse=True,
250
+ )
251
+ for agent, fp in sorted_agents[:3]:
252
  direction = "VIOLATED" if agent.violation_score > 0.1 else "COMPLIANT" if agent.violation_score < -0.1 else "NEUTRAL"
253
  key_evidence.append(
254
  f"{agent.agent_name}: {direction} (score={agent.violation_score:.2f}, "