import os import json from typing import Optional, List from openai import OpenAI from openenv.core.env import OpenEnv from openenv.core.config import EnvConfig from openenv.core.models import Action from openenv.core.grader import create_grader # Hackathon Variables API_BASE_URL = os.getenv("API_BASE_URL") or "https://api.openai.com/v1" MODEL_NAME = os.getenv("MODEL_NAME") or "gpt-4o-mini" HF_TOKEN = os.getenv("HF_TOKEN") or "dummy" BENCHMARK = "OpenEnv Email Triage" system_prompt = """You are an Email Triage AI Agent. You must decide the best action to take for an incoming email. Possible actions: 0 = Ignore 1 = Reply 2 = Forward 3 = Archive (for newsletters/generic non-urgent internal updates) 4 = Delete (for spam) Important Rules: - If the email is clearly spam, Delete it (4). - If the email is urgent/from a boss, NEVER Ignore (0) or Delete (4) or Archive (3). - If the email asks for a report/update and says "forward", Forward it (2). - If the email is urgent and asks a question, Reply (1). - If the email is a routine question from a colleague, Reply (1). Return your answer strictly in the following JSON format: { "action": } """ def log_start(task: str, env: str, model: str) -> None: print(f"[START] task={task} env={env} model={model}", flush=True) def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None: error_val = error if error else "null" done_val = str(done).lower() print( f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True, ) def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None: rewards_str = ",".join(f"{r:.2f}" for r in rewards) print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True) def get_agent_action(client: OpenAI, email) -> int: if not client.api_key or client.api_key == "dummy": if email.is_spam: return 4 if email.is_urgent: return 1 return 3 human_prompt = f"Sender: {email.sender}\nSubject: {email.subject}\nBody:\n{email.body}" try: response = client.chat.completions.create( model=MODEL_NAME, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": human_prompt} ], response_format={"type": "json_object"}, temperature=0.0 ) data = json.loads(response.choices[0].message.content) return int(data.get("action", 3)) except Exception as e: return 3 def main(): client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN) task_configs = { 'easy': {'criteria': [{'name': 'accuracy', 'weight': 0.8}, {'name': 'critical_safety', 'weight': 0.2}]}, 'medium': {'criteria': [{'name': 'accuracy', 'weight': 0.7}, {'name': 'critical_safety', 'weight': 0.3}]}, 'hard': {'criteria': [{'name': 'accuracy', 'weight': 0.6}, {'name': 'critical_safety', 'weight': 0.4}]} } for level in ["easy", "medium", "hard"]: log_start(task=level, env=BENCHMARK, model=MODEL_NAME) config = EnvConfig(task_level=level, verbose=False) env = OpenEnv(config=config) grader = create_grader(level, task_configs[level]) try: obs, info = env.reset(seed=42) grader.reset() rewards = [] steps_taken = 0 while obs.current_email is not None: steps_taken += 1 error = None try: action_int = get_agent_action(client, obs.current_email) except Exception as e: action_int = 3 error = str(e) obs, reward, terminated, truncated, info = env.step(Action(action_type=action_int)) grader.update(**info) rewards.append(reward) done = terminated or truncated log_step(step=steps_taken, action=str(action_int), reward=reward, done=done, error=error) if done: break report = grader.get_grade_report() log_end(success=report['passed'], steps=steps_taken, score=report['final_score'], rewards=rewards) finally: try: env.close() except Exception as e: print(f"[DEBUG] env.close() error: {e}", flush=True) if __name__ == "__main__": main()