""" OpenEnv Email Triage - Hugging Face Spaces Demo with FastAPI Router Interactive web interface for testing the Email Triage environment. Includes a POST /reset endpoint to satisfy automated validation checks. """ import gradio as gr import json import time import numpy as np from pathlib import Path from fastapi import FastAPI import uvicorn from openenv.core.env import OpenEnv from openenv.core.config import EnvConfig from openenv.core.grader import create_grader from openenv.core.models import Action # Create FastAPI app for the Hackathon validation pings app = FastAPI() # Load configuration CONFIG_PATH = Path("openenv.yaml") def load_yaml_config(): try: import yaml with open(CONFIG_PATH, 'r') as f: return yaml.safe_load(f) except: return None def get_task_config(task_level: str) -> dict: yaml_config = load_yaml_config() if yaml_config and 'tasks' in yaml_config: return yaml_config['tasks'][task_level] defaults = { 'easy': {'config': {'num_emails': 10, 'spam_ratio': 0.3, 'urgent_ratio': 0.2, 'confounding_ratio': 0.0}, 'grader': {'success_threshold': 0.7, 'criteria': [{'name': 'accuracy', 'weight': 0.8}, {'name': 'critical_safety', 'weight': 0.2}]}}, 'medium': {'config': {'num_emails': 20, 'spam_ratio': 0.3, 'urgent_ratio': 0.2, 'confounding_ratio': 0.2}, 'grader': {'success_threshold': 0.8, 'criteria': [{'name': 'accuracy', 'weight': 0.7}, {'name': 'critical_safety', 'weight': 0.3}]}}, 'hard': {'config': {'num_emails': 50, 'spam_ratio': 0.4, 'urgent_ratio': 0.1, 'confounding_ratio': 0.4}, 'grader': {'success_threshold': 0.9, 'criteria': [{'name': 'accuracy', 'weight': 0.6}, {'name': 'critical_safety', 'weight': 0.4}]}}, } return defaults.get(task_level, defaults['medium']) # Global environment state for the session env_instance = None grader_instance = None @app.post("/reset") def rest_api_reset(): return {"status": "success"} def run_demo_episode(task_level: str = "medium", seed: int = 42): """ Run single demo episode and return results. """ render_mode = "rgb_array" # Get configuration task_config = get_task_config(task_level) # Create environment env_config = EnvConfig( **task_config['config'], task_level=task_level, render_mode=render_mode, verbose=False, ) try: env = OpenEnv(config=env_config) except Exception as e: import traceback error_msg = f"Failed to create environment: {str(e)}\n\n{traceback.format_exc()}" print(error_msg) # Return placeholder image and error message placeholder = np.zeros((768, 1024, 3), dtype=np.uint8) return placeholder, "Error initializing environment", error_msg # Create grader grader = create_grader(task_level, task_config['grader']) # Reset obs, info = env.reset(seed=seed) grader.reset() # Run episode history = [] total_reward = 0.0 steps = 0 max_steps = 200 # Limit for demo for step in range(max_steps): current_idx = env.current_email_index if current_idx < len(env.emails_queue): email = env.emails_queue[current_idx] sender = email.sender subject = email.subject else: break # Random action for demo (in real use, this would be your agent) action = env.action_space.sample() # Take step obs, reward, terminated, truncated, info = env.step(action) action_map = {0: "Ignore", 1: "Reply", 2: "Forward", 3: "Archive", 4: "Delete"} history.append([ sender, subject, action_map.get(action, str(action)), f"{reward:.1f}", "Yes" if info.get('last_reward', -1) > 0 else "No" ]) # Update grader grader.update(**info) total_reward += reward steps += 1 # Check termination if terminated or truncated: break # Get grade report grade_report = grader.get_grade_report() # Generate metrics text metrics_text = f""" **Episode Statistics:** - Steps: {steps} - Total Reward: {total_reward:.2f} - Correct Actions: {info.get('correct_actions', 0)} - Incorrect Actions: {info.get('incorrect_actions', 0)} - Critical Failures: {info.get('critical_failures', 0)} """.strip() # Generate grade text grade_text = f""" **Performance Grade: {grade_report['final_score']:.2f} / 1.00** {grade_report['feedback']} **Criteria Scores:** """ for criterion_name, score in grade_report['criteria_scores'].items(): grade_text += f"\n- {criterion_name.replace('_', ' ').title()}: {score:.2f}" grade_text += f"\n\n**Status:** {'✓ PASSED' if grade_report['passed'] else '✗ FAILED'}" grade_text += f"\nThreshold: {grade_report['success_threshold']:.2f}" env.close() return history, metrics_text, grade_text def compare_all_levels(seed: int = 42): """ Run comparison across all difficulty levels. Args: seed: Random seed Returns: Comparison table text """ results = [] for level in ['easy', 'medium', 'hard']: task_config = get_task_config(level) env_config = EnvConfig( **task_config['config'], task_level=level, verbose=False, ) env = OpenEnv(config=env_config) grader_instance = create_grader(level, task_config['grader']) obs, _ = env.reset(seed=seed) grader_instance.reset() # Run episode done = False steps = 0 info = {} while not done and steps < 300: action = env.action_space.sample() obs, reward, terminated, truncated, info = env.step(action) grader_instance.update(**info) done = terminated or truncated steps += 1 grade_report = grader_instance.get_grade_report() results.append({ 'level': level.upper(), 'score': grade_report['final_score'], 'passed': '✓' if grade_report['passed'] else '✗', 'steps': steps, }) env.close() # Create comparison table table = "| Difficulty | Score | Status | Steps |\n" table += "|------------|-------|--------|-------|\n" for result in results: table += f"| {result['level']:10s} | {result['score']:.2f} | {result['passed']:6s} | {result['steps']:5d} |\n" return table def create_demo(): with gr.Blocks(title="OpenEnv Email Triage") as demo: gr.Markdown("# 📧 OpenEnv: Email Triage") gr.Markdown("Real-world task environment for AI agent training. Classify the inbox accurately and maintain safety limits.") with gr.Row(): with gr.Column(scale=1): task_level_dropdown = gr.Dropdown(choices=['easy', 'medium', 'hard'], value='medium', label="Difficulty") seed_slider = gr.Slider(minimum=0, maximum=1000, value=42, step=1, label="Random Seed") reset_btn = gr.Button("Initialize Inbox", variant="primary") run_button = gr.Button("🚀 Run Episode", variant="primary") compare_button = gr.Button("📊 Compare All Levels") with gr.Column(scale=3): gr.Markdown("### 📺 Environment View") output_view = gr.Dataframe( label="Inbox Triage History", headers=["Sender", "Subject", "Action Taken", "Reward", "Correct?"], ) with gr.Row(): with gr.Column(): metrics_view = gr.Markdown("### Metrics\nN/A") with gr.Column(): gr.Markdown("### 🎯 Performance Grade") grade_output = gr.Textbox( label="Grade Report", lines=10, ) with gr.Row(): gr.Markdown("### 📋 Level Comparison") comparison_output = gr.Textbox( label="Performance Across Difficulty Levels", lines=8, ) # Event handlers run_button.click( fn=run_demo_episode, inputs=[task_level_dropdown, seed_slider], outputs=[output_view, metrics_view, grade_output], ) compare_button.click( fn=compare_all_levels, inputs=[seed_slider], outputs=[comparison_output], ) # Auto-run on load demo.load( fn=run_demo_episode, inputs=[task_level_dropdown, seed_slider], outputs=[output_view, metrics_view, grade_output], ) gr.Markdown(""" --- **About:** This is a production-ready RL environment for training email triage agents. **Task:** Accurately classify emails. 0=Ignore, 1=Reply, 2=Forward, 3=Archive, 4=Delete. **Scoring:** Agents are graded on accuracy and critical safety (e.g. not deleting urgent emails). [View on GitHub](https://github.com/yourusername/OpenEnv) | [Documentation](https://github.com/yourusername/OpenEnv#readme) """) return demo demo = create_demo() # Mount the Gradio app onto the FastAPI server app = gr.mount_gradio_app(app, demo, path="/") def main(): import uvicorn # Create and launch demo using uvicorn to serve the FastAPI app (with Gradio mounted) uvicorn.run(app, host="0.0.0.0", port=7860) if __name__ == "__main__": main()