Spaces:
Sleeping
Sleeping
| """ | |
| OpenEnv Email Triage - Hugging Face Spaces Demo with FastAPI Router | |
| Interactive web interface for testing the Email Triage environment. | |
| Includes a POST /reset endpoint to satisfy automated validation checks. | |
| """ | |
| import gradio as gr | |
| import json | |
| import time | |
| import numpy as np | |
| from pathlib import Path | |
| from fastapi import FastAPI | |
| import uvicorn | |
| from openenv.core.env import OpenEnv | |
| from openenv.core.config import EnvConfig | |
| from openenv.core.grader import create_grader | |
| from openenv.core.models import Action | |
| # Create FastAPI app for the Hackathon validation pings | |
| app = FastAPI() | |
| # Load configuration | |
| CONFIG_PATH = Path("openenv.yaml") | |
| def load_yaml_config(): | |
| try: | |
| import yaml | |
| with open(CONFIG_PATH, 'r') as f: | |
| return yaml.safe_load(f) | |
| except: | |
| return None | |
| def get_task_config(task_level: str) -> dict: | |
| yaml_config = load_yaml_config() | |
| if yaml_config and 'tasks' in yaml_config: | |
| return yaml_config['tasks'][task_level] | |
| defaults = { | |
| 'easy': {'config': {'num_emails': 10, 'spam_ratio': 0.3, 'urgent_ratio': 0.2, 'confounding_ratio': 0.0}, | |
| 'grader': {'success_threshold': 0.7, 'criteria': [{'name': 'accuracy', 'weight': 0.8}, {'name': 'critical_safety', 'weight': 0.2}]}}, | |
| 'medium': {'config': {'num_emails': 20, 'spam_ratio': 0.3, 'urgent_ratio': 0.2, 'confounding_ratio': 0.2}, | |
| 'grader': {'success_threshold': 0.8, 'criteria': [{'name': 'accuracy', 'weight': 0.7}, {'name': 'critical_safety', 'weight': 0.3}]}}, | |
| 'hard': {'config': {'num_emails': 50, 'spam_ratio': 0.4, 'urgent_ratio': 0.1, 'confounding_ratio': 0.4}, | |
| 'grader': {'success_threshold': 0.9, 'criteria': [{'name': 'accuracy', 'weight': 0.6}, {'name': 'critical_safety', 'weight': 0.4}]}}, | |
| } | |
| return defaults.get(task_level, defaults['medium']) | |
| # Global environment state for the session | |
| env_instance = None | |
| grader_instance = None | |
| def rest_api_reset(): | |
| return {"status": "success"} | |
| def run_demo_episode(task_level: str = "medium", seed: int = 42): | |
| """ | |
| Run single demo episode and return results. | |
| """ | |
| render_mode = "rgb_array" | |
| # Get configuration | |
| task_config = get_task_config(task_level) | |
| # Create environment | |
| env_config = EnvConfig( | |
| **task_config['config'], | |
| task_level=task_level, | |
| render_mode=render_mode, | |
| verbose=False, | |
| ) | |
| try: | |
| env = OpenEnv(config=env_config) | |
| except Exception as e: | |
| import traceback | |
| error_msg = f"Failed to create environment: {str(e)}\n\n{traceback.format_exc()}" | |
| print(error_msg) | |
| # Return placeholder image and error message | |
| placeholder = np.zeros((768, 1024, 3), dtype=np.uint8) | |
| return placeholder, "Error initializing environment", error_msg | |
| # Create grader | |
| grader = create_grader(task_level, task_config['grader']) | |
| # Reset | |
| obs, info = env.reset(seed=seed) | |
| grader.reset() | |
| # Run episode | |
| history = [] | |
| total_reward = 0.0 | |
| steps = 0 | |
| max_steps = 200 # Limit for demo | |
| for step in range(max_steps): | |
| current_idx = env.current_email_index | |
| if current_idx < len(env.emails_queue): | |
| email = env.emails_queue[current_idx] | |
| sender = email.sender | |
| subject = email.subject | |
| else: | |
| break | |
| # Random action for demo (in real use, this would be your agent) | |
| action = env.action_space.sample() | |
| # Take step | |
| obs, reward, terminated, truncated, info = env.step(action) | |
| action_map = {0: "Ignore", 1: "Reply", 2: "Forward", 3: "Archive", 4: "Delete"} | |
| history.append([ | |
| sender, | |
| subject, | |
| action_map.get(action, str(action)), | |
| f"{reward:.1f}", | |
| "Yes" if info.get('last_reward', -1) > 0 else "No" | |
| ]) | |
| # Update grader | |
| grader.update(**info) | |
| total_reward += reward | |
| steps += 1 | |
| # Check termination | |
| if terminated or truncated: | |
| break | |
| # Get grade report | |
| grade_report = grader.get_grade_report() | |
| # Generate metrics text | |
| metrics_text = f""" | |
| **Episode Statistics:** | |
| - Steps: {steps} | |
| - Total Reward: {total_reward:.2f} | |
| - Correct Actions: {info.get('correct_actions', 0)} | |
| - Incorrect Actions: {info.get('incorrect_actions', 0)} | |
| - Critical Failures: {info.get('critical_failures', 0)} | |
| """.strip() | |
| # Generate grade text | |
| grade_text = f""" | |
| **Performance Grade: {grade_report['final_score']:.2f} / 1.00** | |
| {grade_report['feedback']} | |
| **Criteria Scores:** | |
| """ | |
| for criterion_name, score in grade_report['criteria_scores'].items(): | |
| grade_text += f"\n- {criterion_name.replace('_', ' ').title()}: {score:.2f}" | |
| grade_text += f"\n\n**Status:** {'β PASSED' if grade_report['passed'] else 'β FAILED'}" | |
| grade_text += f"\nThreshold: {grade_report['success_threshold']:.2f}" | |
| env.close() | |
| return history, metrics_text, grade_text | |
| def compare_all_levels(seed: int = 42): | |
| """ | |
| Run comparison across all difficulty levels. | |
| Args: | |
| seed: Random seed | |
| Returns: | |
| Comparison table text | |
| """ | |
| results = [] | |
| for level in ['easy', 'medium', 'hard']: | |
| task_config = get_task_config(level) | |
| env_config = EnvConfig( | |
| **task_config['config'], | |
| task_level=level, | |
| verbose=False, | |
| ) | |
| env = OpenEnv(config=env_config) | |
| grader_instance = create_grader(level, task_config['grader']) | |
| obs, _ = env.reset(seed=seed) | |
| grader_instance.reset() | |
| # Run episode | |
| done = False | |
| steps = 0 | |
| info = {} | |
| while not done and steps < 300: | |
| action = env.action_space.sample() | |
| obs, reward, terminated, truncated, info = env.step(action) | |
| grader_instance.update(**info) | |
| done = terminated or truncated | |
| steps += 1 | |
| grade_report = grader_instance.get_grade_report() | |
| results.append({ | |
| 'level': level.upper(), | |
| 'score': grade_report['final_score'], | |
| 'passed': 'β' if grade_report['passed'] else 'β', | |
| 'steps': steps, | |
| }) | |
| env.close() | |
| # Create comparison table | |
| table = "| Difficulty | Score | Status | Steps |\n" | |
| table += "|------------|-------|--------|-------|\n" | |
| for result in results: | |
| table += f"| {result['level']:10s} | {result['score']:.2f} | {result['passed']:6s} | {result['steps']:5d} |\n" | |
| return table | |
| def create_demo(): | |
| with gr.Blocks(title="OpenEnv Email Triage") as demo: | |
| gr.Markdown("# π§ OpenEnv: Email Triage") | |
| gr.Markdown("Real-world task environment for AI agent training. Classify the inbox accurately and maintain safety limits.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| task_level_dropdown = gr.Dropdown(choices=['easy', 'medium', 'hard'], value='medium', label="Difficulty") | |
| seed_slider = gr.Slider(minimum=0, maximum=1000, value=42, step=1, label="Random Seed") | |
| reset_btn = gr.Button("Initialize Inbox", variant="primary") | |
| run_button = gr.Button("π Run Episode", variant="primary") | |
| compare_button = gr.Button("π Compare All Levels") | |
| with gr.Column(scale=3): | |
| gr.Markdown("### πΊ Environment View") | |
| output_view = gr.Dataframe( | |
| label="Inbox Triage History", | |
| headers=["Sender", "Subject", "Action Taken", "Reward", "Correct?"], | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| metrics_view = gr.Markdown("### Metrics\nN/A") | |
| with gr.Column(): | |
| gr.Markdown("### π― Performance Grade") | |
| grade_output = gr.Textbox( | |
| label="Grade Report", | |
| lines=10, | |
| ) | |
| with gr.Row(): | |
| gr.Markdown("### π Level Comparison") | |
| comparison_output = gr.Textbox( | |
| label="Performance Across Difficulty Levels", | |
| lines=8, | |
| ) | |
| # Event handlers | |
| run_button.click( | |
| fn=run_demo_episode, | |
| inputs=[task_level_dropdown, seed_slider], | |
| outputs=[output_view, metrics_view, grade_output], | |
| ) | |
| compare_button.click( | |
| fn=compare_all_levels, | |
| inputs=[seed_slider], | |
| outputs=[comparison_output], | |
| ) | |
| # Auto-run on load | |
| demo.load( | |
| fn=run_demo_episode, | |
| inputs=[task_level_dropdown, seed_slider], | |
| outputs=[output_view, metrics_view, grade_output], | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **About:** This is a production-ready RL environment for training email triage agents. | |
| **Task:** Accurately classify emails. 0=Ignore, 1=Reply, 2=Forward, 3=Archive, 4=Delete. | |
| **Scoring:** Agents are graded on accuracy and critical safety (e.g. not deleting urgent emails). | |
| [View on GitHub](https://github.com/yourusername/OpenEnv) | [Documentation](https://github.com/yourusername/OpenEnv#readme) | |
| """) | |
| return demo | |
| demo = create_demo() | |
| # Mount the Gradio app onto the FastAPI server | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| def main(): | |
| import uvicorn | |
| # Create and launch demo using uvicorn to serve the FastAPI app (with Gradio mounted) | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |
| if __name__ == "__main__": | |
| main() | |