O96a commited on
Commit
9f7707b
·
verified ·
1 Parent(s): 0281ac5

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +192 -0
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PrefixGuard Demo - Agent Failure Detection from Traces
3
+ Based on: "PrefixGuard: From LLM-Agent Traces to Online Failure-Warning Monitors"
4
+ Paper: https://huggingface.co/papers/2605.06455
5
+ """
6
+
7
+ import gradio as gr
8
+ import numpy as np
9
+ from typing import List, Tuple, Dict
10
+ import json
11
+
12
+ # Lazy-loaded model state
13
+ _risk_model = None
14
+ _trace_encoder = None
15
+
16
+ def get_risk_model():
17
+ """Lazy load risk scoring model"""
18
+ global _risk_model
19
+ if _risk_model is None:
20
+ # Simulated: in production this would load trained PrefixGuard checkpoint
21
+ _risk_model = {
22
+ 'step_weights': np.array([0.1, 0.15, 0.2, 0.25, 0.3]), # Later steps matter more
23
+ 'failure_keywords': ['error', 'fail', 'timeout', 'exception', 'invalid', 'denied', 'unable'],
24
+ 'success_keywords': ['success', 'completed', 'done', 'result', 'output']
25
+ }
26
+ return _risk_model
27
+
28
+ def encode_trace_steps(steps: List[str]) -> np.ndarray:
29
+ """Encode agent trace steps to feature vectors"""
30
+ model = get_risk_model()
31
+ features = []
32
+
33
+ for step in steps:
34
+ step_lower = step.lower()
35
+ # Simple keyword-based features (paper uses learned event abstraction)
36
+ has_fail = any(kw in step_lower for kw in model['failure_keywords'])
37
+ has_success = any(kw in step_lower for kw in model['success_keywords'])
38
+ step_len = len(step)
39
+ has_tool_call = any(x in step_lower for x in ['tool', 'function', 'call', 'api'])
40
+ has_observation = any(x in step_lower for x in ['observation', 'result', 'returned'])
41
+
42
+ features.append([has_fail, has_success, step_len / 500, has_tool_call, has_observation])
43
+
44
+ return np.array(features)
45
+
46
+ def compute_prefix_risk(steps: List[str]) -> Tuple[float, List[float]]:
47
+ """Compute risk score from partial trace prefix"""
48
+ if not steps:
49
+ return 0.5, []
50
+
51
+ model = get_risk_model()
52
+ features = encode_trace_steps(steps)
53
+
54
+ step_risks = []
55
+ for i, feat in enumerate(features):
56
+ # Weighted combination (simplified from paper's learned scorer)
57
+ fail_score = feat[0] * 0.8 + feat[2] * 0.1 # failure keywords weight
58
+ success_score = feat[1] * 0.7 # success keywords
59
+
60
+ # Position weight: later steps contribute more
61
+ pos_weight = model['step_weights'][min(i, len(model['step_weights'])-1)]
62
+ step_risk = (fail_score - success_score * 0.5) * pos_weight
63
+ step_risks.append(max(0, min(1, 0.3 + step_risk)))
64
+
65
+ # Aggregate: max risk seen so far with recency bias
66
+ if step_risks:
67
+ max_risk = max(step_risks)
68
+ recent_risk = step_risks[-1]
69
+ final_risk = 0.6 * max_risk + 0.4 * recent_risk
70
+ else:
71
+ final_risk = 0.5
72
+
73
+ return round(final_risk, 3), [round(r, 3) for r in step_risks]
74
+
75
+ def analyze_trace(trace_text: str) -> Dict:
76
+ """Analyze full agent trace for failure prediction"""
77
+ steps = [s.strip() for s in trace_text.split('\n') if s.strip()]
78
+
79
+ if len(steps) < 2:
80
+ return {
81
+ "error": "Please provide at least 2 trace steps (one per line)"
82
+ }
83
+
84
+ # Compute risk at each prefix length
85
+ prefix_results = []
86
+ for i in range(1, len(steps) + 1):
87
+ prefix = steps[:i]
88
+ risk, step_risks = compute_prefix_risk(prefix)
89
+ prefix_results.append({
90
+ "step": i,
91
+ "risk_score": risk,
92
+ "alert": "⚠️ HIGH RISK" if risk > 0.7 else ("⚡ MEDIUM" if risk > 0.5 else "✅ LOW"),
93
+ "content_preview": prefix[-1][:80] + "..." if len(prefix[-1]) > 80 else prefix[-1]
94
+ })
95
+
96
+ final_risk = prefix_results[-1]["risk_score"]
97
+ final_outcome = "FAILURE" if final_risk > 0.6 else "SUCCESS"
98
+ early_warning_step = None
99
+ for i, res in enumerate(prefix_results):
100
+ if res["risk_score"] > 0.7:
101
+ early_warning_step = i + 1
102
+ break
103
+
104
+ return {
105
+ "total_steps": len(steps),
106
+ "final_risk": final_risk,
107
+ "predicted_outcome": final_outcome,
108
+ "early_warning_at_step": early_warning_step,
109
+ "prefix_analysis": prefix_results
110
+ }
111
+
112
+ def demo_interface():
113
+ """Gradio interface for PrefixGuard demo"""
114
+
115
+ def process_trace(trace_text):
116
+ result = analyze_trace(trace_text)
117
+
118
+ if "error" in result:
119
+ return result["error"], "", ""
120
+
121
+ # Build summary
122
+ summary = f"""## Analysis Results
123
+
124
+ **Total Steps:** {result['total_steps']}
125
+ **Final Risk Score:** {result['final_risk']:.3f}
126
+ **Predicted Outcome:** {result['predicted_outcome']}
127
+ **Early Warning:** Step {result['early_warning_at_step']} (if any)
128
+
129
+ ### Key Insight
130
+ This demonstrates how PrefixGuard predicts failures from partial traces,
131
+ enabling intervention before task completion."""
132
+
133
+ # Build step-by-step table
134
+ table = "| Step | Risk | Alert | Preview |\n|------|------|-------|---------|\n"
135
+ for r in result['prefix_analysis']:
136
+ table += f"| {r['step']} | {r['risk_score']:.3f} | {r['alert']} | {r['content_preview']} |\n"
137
+
138
+ # Risk progression
139
+ risks = [r['risk_score'] for r in result['prefix_analysis']]
140
+ risk_chart = "Risk progression: " + " → ".join([f"{r:.2f}" for r in risks])
141
+
142
+ return summary, table, risk_chart
143
+
144
+ with gr.Blocks(title="PrefixGuard Demo") as demo:
145
+ gr.Markdown("""# 🛡️ PrefixGuard Demo
146
+
147
+ **Agent Failure Detection from Execution Traces**
148
+
149
+ Based on: *"PrefixGuard: From LLM-Agent Traces to Online Failure-Warning Monitors"* (Huang et al., 2026)
150
+
151
+ Enter agent execution steps (one per line) to see how prefix-based monitoring predicts failures.""")
152
+
153
+ with gr.Row():
154
+ with gr.Column(scale=2):
155
+ trace_input = gr.Textbox(
156
+ label="Agent Trace Steps (one per line)",
157
+ placeholder="Step 1: Calling search tool...\nStep 2: Tool returned error...\nStep 3: Retrying with...",
158
+ lines=10
159
+ )
160
+ analyze_btn = gr.Button("Analyze Trace", variant="primary")
161
+
162
+ # Example traces
163
+ gr.Examples(
164
+ examples=[
165
+ ["Tool: search_web\\nObservation: 5 results found\\nTool: click_result\\nObservation: Page loaded\\nTool: extract_data\\nObservation: Success: extracted 3 records"],
166
+ ["Tool: api_call\\nObservation: Error 500 internal server error\\nTool: retry\\nObservation: Error timeout\\nTool: fallback\\nObservation: Unable to complete"],
167
+ ["Step 1: Initializing agent\\nStep 2: Planning task execution\\nStep 3: Tool call failed with exception\\nStep 4: Error propagation detected"]
168
+ ],
169
+ inputs=[trace_input],
170
+ label="Example Traces"
171
+ )
172
+
173
+ with gr.Column(scale=3):
174
+ summary_out = gr.Markdown(label="Summary")
175
+ table_out = gr.Markdown(label="Step-by-Step Analysis")
176
+ chart_out = gr.Textbox(label="Risk Progression", interactive=False)
177
+
178
+ analyze_btn.click(
179
+ fn=process_trace,
180
+ inputs=[trace_input],
181
+ outputs=[summary_out, table_out, chart_out]
182
+ )
183
+
184
+ gr.Markdown("""---
185
+ **Note:** This is a simplified demonstration. The full PrefixGuard paper achieves 0.900 AUPRC on WebArena
186
+ using learned event abstractions and finite-state monitors trained on terminal outcomes.""")
187
+
188
+ return demo
189
+
190
+ if __name__ == "__main__":
191
+ demo = demo_interface()
192
+ demo.launch()