Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| from typing import Optional, List | |
| from openai import OpenAI | |
| from openenv.core.env import OpenEnv | |
| from openenv.core.config import EnvConfig | |
| from openenv.core.models import Action | |
| from openenv.core.grader import create_grader | |
| # Hackathon Variables | |
| API_BASE_URL = os.getenv("API_BASE_URL") or "https://api.openai.com/v1" | |
| MODEL_NAME = os.getenv("MODEL_NAME") or "gpt-4o-mini" | |
| HF_TOKEN = os.getenv("HF_TOKEN") or "dummy" | |
| BENCHMARK = "OpenEnv Email Triage" | |
| system_prompt = """You are an Email Triage AI Agent. You must decide the best action to take for an incoming email. | |
| Possible actions: | |
| 0 = Ignore | |
| 1 = Reply | |
| 2 = Forward | |
| 3 = Archive (for newsletters/generic non-urgent internal updates) | |
| 4 = Delete (for spam) | |
| Important Rules: | |
| - If the email is clearly spam, Delete it (4). | |
| - If the email is urgent/from a boss, NEVER Ignore (0) or Delete (4) or Archive (3). | |
| - If the email asks for a report/update and says "forward", Forward it (2). | |
| - If the email is urgent and asks a question, Reply (1). | |
| - If the email is a routine question from a colleague, Reply (1). | |
| Return your answer strictly in the following JSON format: | |
| { | |
| "action": <int> | |
| } | |
| """ | |
| def log_start(task: str, env: str, model: str) -> None: | |
| print(f"[START] task={task} env={env} model={model}", flush=True) | |
| def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None: | |
| error_val = error if error else "null" | |
| done_val = str(done).lower() | |
| print( | |
| f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", | |
| flush=True, | |
| ) | |
| def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None: | |
| rewards_str = ",".join(f"{r:.2f}" for r in rewards) | |
| print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True) | |
| def get_agent_action(client: OpenAI, email) -> int: | |
| if not client.api_key or client.api_key == "dummy": | |
| if email.is_spam: return 4 | |
| if email.is_urgent: return 1 | |
| return 3 | |
| human_prompt = f"Sender: {email.sender}\nSubject: {email.subject}\nBody:\n{email.body}" | |
| try: | |
| response = client.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": human_prompt} | |
| ], | |
| response_format={"type": "json_object"}, | |
| temperature=0.0 | |
| ) | |
| data = json.loads(response.choices[0].message.content) | |
| return int(data.get("action", 3)) | |
| except Exception as e: | |
| return 3 | |
| def main(): | |
| client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN) | |
| task_configs = { | |
| 'easy': {'criteria': [{'name': 'accuracy', 'weight': 0.8}, {'name': 'critical_safety', 'weight': 0.2}]}, | |
| 'medium': {'criteria': [{'name': 'accuracy', 'weight': 0.7}, {'name': 'critical_safety', 'weight': 0.3}]}, | |
| 'hard': {'criteria': [{'name': 'accuracy', 'weight': 0.6}, {'name': 'critical_safety', 'weight': 0.4}]} | |
| } | |
| for level in ["easy", "medium", "hard"]: | |
| log_start(task=level, env=BENCHMARK, model=MODEL_NAME) | |
| config = EnvConfig(task_level=level, verbose=False) | |
| env = OpenEnv(config=config) | |
| grader = create_grader(level, task_configs[level]) | |
| try: | |
| obs, info = env.reset(seed=42) | |
| grader.reset() | |
| rewards = [] | |
| steps_taken = 0 | |
| while obs.current_email is not None: | |
| steps_taken += 1 | |
| error = None | |
| try: | |
| action_int = get_agent_action(client, obs.current_email) | |
| except Exception as e: | |
| action_int = 3 | |
| error = str(e) | |
| obs, reward, terminated, truncated, info = env.step(Action(action_type=action_int)) | |
| grader.update(**info) | |
| rewards.append(reward) | |
| done = terminated or truncated | |
| log_step(step=steps_taken, action=str(action_int), reward=reward, done=done, error=error) | |
| if done: | |
| break | |
| report = grader.get_grade_report() | |
| log_end(success=report['passed'], steps=steps_taken, score=report['final_score'], rewards=rewards) | |
| finally: | |
| try: | |
| env.close() | |
| except Exception as e: | |
| print(f"[DEBUG] env.close() error: {e}", flush=True) | |
| if __name__ == "__main__": | |
| main() | |