mahammadaftab commited on
Commit
a8f498e
·
1 Parent(s): 82c787b

Error Debugged

Browse files
Files changed (1) hide show
  1. app.py +33 -76
app.py CHANGED
@@ -91,72 +91,45 @@ def run_demo_episode(task_level: str = "medium", seed: int = 42):
91
  grader.reset()
92
 
93
  # Run episode
94
- frames = []
95
  total_reward = 0.0
96
  steps = 0
97
  max_steps = 200 # Limit for demo
98
 
99
- prev_position = env.position.copy()
100
- optimal_distance = np.linalg.norm(env.target_position - env.position)
101
- grader.episode_data['optimal_distance'] = optimal_distance
102
-
103
  for step in range(max_steps):
 
 
 
 
 
 
 
 
104
  # Random action for demo (in real use, this would be your agent)
105
  action = env.action_space.sample()
106
 
107
  # Take step
108
  obs, reward, terminated, truncated, info = env.step(action)
109
 
 
 
 
 
 
 
 
 
 
110
  # Update grader
111
- current_position = env.position.copy()
112
- distance_delta = np.linalg.norm(current_position - prev_position)
113
-
114
- grader.update(
115
- steps=1,
116
- distance_traveled=distance_delta,
117
- energy_consumed=np.sum(np.abs(action)) * 0.5,
118
- )
119
-
120
- # Check collisions
121
- if hasattr(env, 'check_collision') and env.check_collision():
122
- grader.update(collisions=1)
123
-
124
- # Track wind deviation
125
- if env.config.wind_disturbance and hasattr(env, 'wind_deviation'):
126
- grader.update(max_wind_deviation=max(
127
- grader.episode_data['max_wind_deviation'],
128
- env.wind_deviation
129
- ))
130
 
131
- prev_position = current_position.copy()
132
  total_reward += reward
133
  steps += 1
134
 
135
- # Render frame
136
- if render_mode == "rgb_array":
137
- try:
138
- frame = env.render()
139
- if frame is not None:
140
- frames.append(frame)
141
- except Exception as e:
142
- print(f"Rendering error (non-fatal): {e}")
143
- # Continue without rendering
144
- pass
145
-
146
  # Check termination
147
  if terminated or truncated:
148
  break
149
 
150
- # Final updates
151
- final_distance = np.linalg.norm(env.position - env.target_position)
152
- target_radius = getattr(env, 'target_radius', 5.0)
153
-
154
- grader.update(
155
- target_reached=final_distance < target_radius,
156
- final_distance_to_target=final_distance,
157
- time_to_complete=steps,
158
- )
159
-
160
  # Get grade report
161
  grade_report = grader.get_grade_report()
162
 
@@ -165,9 +138,9 @@ def run_demo_episode(task_level: str = "medium", seed: int = 42):
165
  **Episode Statistics:**
166
  - Steps: {steps}
167
  - Total Reward: {total_reward:.2f}
168
- - Final Distance: {final_distance:.2f}
169
- - Target Reached: {'Yes ✓' if grade_report['episode_data']['target_reached'] else 'No ✗'}
170
- - Collisions: {grade_report['episode_data']['collisions']}
171
  """.strip()
172
 
173
  # Generate grade text
@@ -187,15 +160,7 @@ def run_demo_episode(task_level: str = "medium", seed: int = 42):
187
 
188
  env.close()
189
 
190
- # Return last frame (or create composite if multiple frames)
191
- if len(frames) > 0:
192
- # Use middle frame as representative
193
- screenshot = frames[len(frames) // 2]
194
- else:
195
- # Create placeholder
196
- screenshot = np.zeros((768, 1024, 3), dtype=np.uint8)
197
-
198
- return screenshot, metrics_text, grade_text
199
 
200
 
201
  def compare_all_levels(seed: int = 42):
@@ -228,21 +193,14 @@ def compare_all_levels(seed: int = 42):
228
  # Run episode
229
  done = False
230
  steps = 0
 
231
  while not done and steps < 300:
232
  action = env.action_space.sample()
233
  obs, reward, terminated, truncated, info = env.step(action)
234
-
235
- grader_instance.update(steps=1)
236
  done = terminated or truncated
237
  steps += 1
238
 
239
- # Final evaluation
240
- final_distance = np.linalg.norm(env.position - env.target_position)
241
- grader_instance.update(
242
- target_reached=final_distance < 5.0,
243
- final_distance_to_target=final_distance,
244
- )
245
-
246
  grade_report = grader_instance.get_grade_report()
247
 
248
  results.append({
@@ -282,10 +240,9 @@ def create_demo():
282
  with gr.Column(scale=3):
283
  gr.Markdown("### 📺 Environment View")
284
 
285
- output_image = gr.Image(
286
- label="Drone Navigation",
287
- type="numpy",
288
- height=500,
289
  )
290
 
291
  with gr.Row():
@@ -309,7 +266,7 @@ def create_demo():
309
  run_button.click(
310
  fn=run_demo_episode,
311
  inputs=[task_level_dropdown, seed_slider],
312
- outputs=[output_image, metrics_view, grade_output],
313
  )
314
 
315
  compare_button.click(
@@ -322,16 +279,16 @@ def create_demo():
322
  demo.load(
323
  fn=run_demo_episode,
324
  inputs=[task_level_dropdown, seed_slider],
325
- outputs=[output_image, metrics_view, grade_output],
326
  )
327
 
328
  gr.Markdown("""
329
  ---
330
- **About:** This is a production-ready RL environment for training autonomous drones.
331
 
332
- **Task:** Navigate to the green target while managing velocity and avoiding obstacles.
333
 
334
- **Scoring:** Agents are graded on target acquisition, collision avoidance, time efficiency, and energy management.
335
 
336
  [View on GitHub](https://github.com/yourusername/OpenEnv) | [Documentation](https://github.com/yourusername/OpenEnv#readme)
337
  """)
 
91
  grader.reset()
92
 
93
  # Run episode
94
+ history = []
95
  total_reward = 0.0
96
  steps = 0
97
  max_steps = 200 # Limit for demo
98
 
 
 
 
 
99
  for step in range(max_steps):
100
+ current_idx = env.current_email_index
101
+ if current_idx < len(env.emails_queue):
102
+ email = env.emails_queue[current_idx]
103
+ sender = email.sender
104
+ subject = email.subject
105
+ else:
106
+ break
107
+
108
  # Random action for demo (in real use, this would be your agent)
109
  action = env.action_space.sample()
110
 
111
  # Take step
112
  obs, reward, terminated, truncated, info = env.step(action)
113
 
114
+ action_map = {0: "Ignore", 1: "Reply", 2: "Forward", 3: "Archive", 4: "Delete"}
115
+ history.append([
116
+ sender,
117
+ subject,
118
+ action_map.get(action, str(action)),
119
+ f"{reward:.1f}",
120
+ "Yes" if info.get('last_reward', -1) > 0 else "No"
121
+ ])
122
+
123
  # Update grader
124
+ grader.update(**info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
 
126
  total_reward += reward
127
  steps += 1
128
 
 
 
 
 
 
 
 
 
 
 
 
129
  # Check termination
130
  if terminated or truncated:
131
  break
132
 
 
 
 
 
 
 
 
 
 
 
133
  # Get grade report
134
  grade_report = grader.get_grade_report()
135
 
 
138
  **Episode Statistics:**
139
  - Steps: {steps}
140
  - Total Reward: {total_reward:.2f}
141
+ - Correct Actions: {info.get('correct_actions', 0)}
142
+ - Incorrect Actions: {info.get('incorrect_actions', 0)}
143
+ - Critical Failures: {info.get('critical_failures', 0)}
144
  """.strip()
145
 
146
  # Generate grade text
 
160
 
161
  env.close()
162
 
163
+ return history, metrics_text, grade_text
 
 
 
 
 
 
 
 
164
 
165
 
166
  def compare_all_levels(seed: int = 42):
 
193
  # Run episode
194
  done = False
195
  steps = 0
196
+ info = {}
197
  while not done and steps < 300:
198
  action = env.action_space.sample()
199
  obs, reward, terminated, truncated, info = env.step(action)
200
+ grader_instance.update(**info)
 
201
  done = terminated or truncated
202
  steps += 1
203
 
 
 
 
 
 
 
 
204
  grade_report = grader_instance.get_grade_report()
205
 
206
  results.append({
 
240
  with gr.Column(scale=3):
241
  gr.Markdown("### 📺 Environment View")
242
 
243
+ output_view = gr.Dataframe(
244
+ label="Inbox Triage History",
245
+ headers=["Sender", "Subject", "Action Taken", "Reward", "Correct?"],
 
246
  )
247
 
248
  with gr.Row():
 
266
  run_button.click(
267
  fn=run_demo_episode,
268
  inputs=[task_level_dropdown, seed_slider],
269
+ outputs=[output_view, metrics_view, grade_output],
270
  )
271
 
272
  compare_button.click(
 
279
  demo.load(
280
  fn=run_demo_episode,
281
  inputs=[task_level_dropdown, seed_slider],
282
+ outputs=[output_view, metrics_view, grade_output],
283
  )
284
 
285
  gr.Markdown("""
286
  ---
287
+ **About:** This is a production-ready RL environment for training email triage agents.
288
 
289
+ **Task:** Accurately classify emails. 0=Ignore, 1=Reply, 2=Forward, 3=Archive, 4=Delete.
290
 
291
+ **Scoring:** Agents are graded on accuracy and critical safety (e.g. not deleting urgent emails).
292
 
293
  [View on GitHub](https://github.com/yourusername/OpenEnv) | [Documentation](https://github.com/yourusername/OpenEnv#readme)
294
  """)