Spaces:
Sleeping
Sleeping
Commit ·
a8f498e
1
Parent(s): 82c787b
Error Debugged
Browse files
app.py
CHANGED
|
@@ -91,72 +91,45 @@ def run_demo_episode(task_level: str = "medium", seed: int = 42):
|
|
| 91 |
grader.reset()
|
| 92 |
|
| 93 |
# Run episode
|
| 94 |
-
|
| 95 |
total_reward = 0.0
|
| 96 |
steps = 0
|
| 97 |
max_steps = 200 # Limit for demo
|
| 98 |
|
| 99 |
-
prev_position = env.position.copy()
|
| 100 |
-
optimal_distance = np.linalg.norm(env.target_position - env.position)
|
| 101 |
-
grader.episode_data['optimal_distance'] = optimal_distance
|
| 102 |
-
|
| 103 |
for step in range(max_steps):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
# Random action for demo (in real use, this would be your agent)
|
| 105 |
action = env.action_space.sample()
|
| 106 |
|
| 107 |
# Take step
|
| 108 |
obs, reward, terminated, truncated, info = env.step(action)
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
# Update grader
|
| 111 |
-
|
| 112 |
-
distance_delta = np.linalg.norm(current_position - prev_position)
|
| 113 |
-
|
| 114 |
-
grader.update(
|
| 115 |
-
steps=1,
|
| 116 |
-
distance_traveled=distance_delta,
|
| 117 |
-
energy_consumed=np.sum(np.abs(action)) * 0.5,
|
| 118 |
-
)
|
| 119 |
-
|
| 120 |
-
# Check collisions
|
| 121 |
-
if hasattr(env, 'check_collision') and env.check_collision():
|
| 122 |
-
grader.update(collisions=1)
|
| 123 |
-
|
| 124 |
-
# Track wind deviation
|
| 125 |
-
if env.config.wind_disturbance and hasattr(env, 'wind_deviation'):
|
| 126 |
-
grader.update(max_wind_deviation=max(
|
| 127 |
-
grader.episode_data['max_wind_deviation'],
|
| 128 |
-
env.wind_deviation
|
| 129 |
-
))
|
| 130 |
|
| 131 |
-
prev_position = current_position.copy()
|
| 132 |
total_reward += reward
|
| 133 |
steps += 1
|
| 134 |
|
| 135 |
-
# Render frame
|
| 136 |
-
if render_mode == "rgb_array":
|
| 137 |
-
try:
|
| 138 |
-
frame = env.render()
|
| 139 |
-
if frame is not None:
|
| 140 |
-
frames.append(frame)
|
| 141 |
-
except Exception as e:
|
| 142 |
-
print(f"Rendering error (non-fatal): {e}")
|
| 143 |
-
# Continue without rendering
|
| 144 |
-
pass
|
| 145 |
-
|
| 146 |
# Check termination
|
| 147 |
if terminated or truncated:
|
| 148 |
break
|
| 149 |
|
| 150 |
-
# Final updates
|
| 151 |
-
final_distance = np.linalg.norm(env.position - env.target_position)
|
| 152 |
-
target_radius = getattr(env, 'target_radius', 5.0)
|
| 153 |
-
|
| 154 |
-
grader.update(
|
| 155 |
-
target_reached=final_distance < target_radius,
|
| 156 |
-
final_distance_to_target=final_distance,
|
| 157 |
-
time_to_complete=steps,
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
# Get grade report
|
| 161 |
grade_report = grader.get_grade_report()
|
| 162 |
|
|
@@ -165,9 +138,9 @@ def run_demo_episode(task_level: str = "medium", seed: int = 42):
|
|
| 165 |
**Episode Statistics:**
|
| 166 |
- Steps: {steps}
|
| 167 |
- Total Reward: {total_reward:.2f}
|
| 168 |
-
-
|
| 169 |
-
-
|
| 170 |
-
-
|
| 171 |
""".strip()
|
| 172 |
|
| 173 |
# Generate grade text
|
|
@@ -187,15 +160,7 @@ def run_demo_episode(task_level: str = "medium", seed: int = 42):
|
|
| 187 |
|
| 188 |
env.close()
|
| 189 |
|
| 190 |
-
|
| 191 |
-
if len(frames) > 0:
|
| 192 |
-
# Use middle frame as representative
|
| 193 |
-
screenshot = frames[len(frames) // 2]
|
| 194 |
-
else:
|
| 195 |
-
# Create placeholder
|
| 196 |
-
screenshot = np.zeros((768, 1024, 3), dtype=np.uint8)
|
| 197 |
-
|
| 198 |
-
return screenshot, metrics_text, grade_text
|
| 199 |
|
| 200 |
|
| 201 |
def compare_all_levels(seed: int = 42):
|
|
@@ -228,21 +193,14 @@ def compare_all_levels(seed: int = 42):
|
|
| 228 |
# Run episode
|
| 229 |
done = False
|
| 230 |
steps = 0
|
|
|
|
| 231 |
while not done and steps < 300:
|
| 232 |
action = env.action_space.sample()
|
| 233 |
obs, reward, terminated, truncated, info = env.step(action)
|
| 234 |
-
|
| 235 |
-
grader_instance.update(steps=1)
|
| 236 |
done = terminated or truncated
|
| 237 |
steps += 1
|
| 238 |
|
| 239 |
-
# Final evaluation
|
| 240 |
-
final_distance = np.linalg.norm(env.position - env.target_position)
|
| 241 |
-
grader_instance.update(
|
| 242 |
-
target_reached=final_distance < 5.0,
|
| 243 |
-
final_distance_to_target=final_distance,
|
| 244 |
-
)
|
| 245 |
-
|
| 246 |
grade_report = grader_instance.get_grade_report()
|
| 247 |
|
| 248 |
results.append({
|
|
@@ -282,10 +240,9 @@ def create_demo():
|
|
| 282 |
with gr.Column(scale=3):
|
| 283 |
gr.Markdown("### 📺 Environment View")
|
| 284 |
|
| 285 |
-
|
| 286 |
-
label="
|
| 287 |
-
|
| 288 |
-
height=500,
|
| 289 |
)
|
| 290 |
|
| 291 |
with gr.Row():
|
|
@@ -309,7 +266,7 @@ def create_demo():
|
|
| 309 |
run_button.click(
|
| 310 |
fn=run_demo_episode,
|
| 311 |
inputs=[task_level_dropdown, seed_slider],
|
| 312 |
-
outputs=[
|
| 313 |
)
|
| 314 |
|
| 315 |
compare_button.click(
|
|
@@ -322,16 +279,16 @@ def create_demo():
|
|
| 322 |
demo.load(
|
| 323 |
fn=run_demo_episode,
|
| 324 |
inputs=[task_level_dropdown, seed_slider],
|
| 325 |
-
outputs=[
|
| 326 |
)
|
| 327 |
|
| 328 |
gr.Markdown("""
|
| 329 |
---
|
| 330 |
-
**About:** This is a production-ready RL environment for training
|
| 331 |
|
| 332 |
-
**Task:**
|
| 333 |
|
| 334 |
-
**Scoring:** Agents are graded on
|
| 335 |
|
| 336 |
[View on GitHub](https://github.com/yourusername/OpenEnv) | [Documentation](https://github.com/yourusername/OpenEnv#readme)
|
| 337 |
""")
|
|
|
|
| 91 |
grader.reset()
|
| 92 |
|
| 93 |
# Run episode
|
| 94 |
+
history = []
|
| 95 |
total_reward = 0.0
|
| 96 |
steps = 0
|
| 97 |
max_steps = 200 # Limit for demo
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
for step in range(max_steps):
|
| 100 |
+
current_idx = env.current_email_index
|
| 101 |
+
if current_idx < len(env.emails_queue):
|
| 102 |
+
email = env.emails_queue[current_idx]
|
| 103 |
+
sender = email.sender
|
| 104 |
+
subject = email.subject
|
| 105 |
+
else:
|
| 106 |
+
break
|
| 107 |
+
|
| 108 |
# Random action for demo (in real use, this would be your agent)
|
| 109 |
action = env.action_space.sample()
|
| 110 |
|
| 111 |
# Take step
|
| 112 |
obs, reward, terminated, truncated, info = env.step(action)
|
| 113 |
|
| 114 |
+
action_map = {0: "Ignore", 1: "Reply", 2: "Forward", 3: "Archive", 4: "Delete"}
|
| 115 |
+
history.append([
|
| 116 |
+
sender,
|
| 117 |
+
subject,
|
| 118 |
+
action_map.get(action, str(action)),
|
| 119 |
+
f"{reward:.1f}",
|
| 120 |
+
"Yes" if info.get('last_reward', -1) > 0 else "No"
|
| 121 |
+
])
|
| 122 |
+
|
| 123 |
# Update grader
|
| 124 |
+
grader.update(**info)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
|
|
|
| 126 |
total_reward += reward
|
| 127 |
steps += 1
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
# Check termination
|
| 130 |
if terminated or truncated:
|
| 131 |
break
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
# Get grade report
|
| 134 |
grade_report = grader.get_grade_report()
|
| 135 |
|
|
|
|
| 138 |
**Episode Statistics:**
|
| 139 |
- Steps: {steps}
|
| 140 |
- Total Reward: {total_reward:.2f}
|
| 141 |
+
- Correct Actions: {info.get('correct_actions', 0)}
|
| 142 |
+
- Incorrect Actions: {info.get('incorrect_actions', 0)}
|
| 143 |
+
- Critical Failures: {info.get('critical_failures', 0)}
|
| 144 |
""".strip()
|
| 145 |
|
| 146 |
# Generate grade text
|
|
|
|
| 160 |
|
| 161 |
env.close()
|
| 162 |
|
| 163 |
+
return history, metrics_text, grade_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
|
| 166 |
def compare_all_levels(seed: int = 42):
|
|
|
|
| 193 |
# Run episode
|
| 194 |
done = False
|
| 195 |
steps = 0
|
| 196 |
+
info = {}
|
| 197 |
while not done and steps < 300:
|
| 198 |
action = env.action_space.sample()
|
| 199 |
obs, reward, terminated, truncated, info = env.step(action)
|
| 200 |
+
grader_instance.update(**info)
|
|
|
|
| 201 |
done = terminated or truncated
|
| 202 |
steps += 1
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
grade_report = grader_instance.get_grade_report()
|
| 205 |
|
| 206 |
results.append({
|
|
|
|
| 240 |
with gr.Column(scale=3):
|
| 241 |
gr.Markdown("### 📺 Environment View")
|
| 242 |
|
| 243 |
+
output_view = gr.Dataframe(
|
| 244 |
+
label="Inbox Triage History",
|
| 245 |
+
headers=["Sender", "Subject", "Action Taken", "Reward", "Correct?"],
|
|
|
|
| 246 |
)
|
| 247 |
|
| 248 |
with gr.Row():
|
|
|
|
| 266 |
run_button.click(
|
| 267 |
fn=run_demo_episode,
|
| 268 |
inputs=[task_level_dropdown, seed_slider],
|
| 269 |
+
outputs=[output_view, metrics_view, grade_output],
|
| 270 |
)
|
| 271 |
|
| 272 |
compare_button.click(
|
|
|
|
| 279 |
demo.load(
|
| 280 |
fn=run_demo_episode,
|
| 281 |
inputs=[task_level_dropdown, seed_slider],
|
| 282 |
+
outputs=[output_view, metrics_view, grade_output],
|
| 283 |
)
|
| 284 |
|
| 285 |
gr.Markdown("""
|
| 286 |
---
|
| 287 |
+
**About:** This is a production-ready RL environment for training email triage agents.
|
| 288 |
|
| 289 |
+
**Task:** Accurately classify emails. 0=Ignore, 1=Reply, 2=Forward, 3=Archive, 4=Delete.
|
| 290 |
|
| 291 |
+
**Scoring:** Agents are graded on accuracy and critical safety (e.g. not deleting urgent emails).
|
| 292 |
|
| 293 |
[View on GitHub](https://github.com/yourusername/OpenEnv) | [Documentation](https://github.com/yourusername/OpenEnv#readme)
|
| 294 |
""")
|