Spaces:
Sleeping
Sleeping
File size: 4,703 Bytes
4b77608 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | import os
import json
from typing import Optional, List
from openai import OpenAI
from openenv.core.env import OpenEnv
from openenv.core.config import EnvConfig
from openenv.core.models import Action
from openenv.core.grader import create_grader
# Hackathon Variables
API_BASE_URL = os.getenv("API_BASE_URL") or "https://api.openai.com/v1"
MODEL_NAME = os.getenv("MODEL_NAME") or "gpt-4o-mini"
HF_TOKEN = os.getenv("HF_TOKEN") or "dummy"
BENCHMARK = "OpenEnv Email Triage"
system_prompt = """You are an Email Triage AI Agent. You must decide the best action to take for an incoming email.
Possible actions:
0 = Ignore
1 = Reply
2 = Forward
3 = Archive (for newsletters/generic non-urgent internal updates)
4 = Delete (for spam)
Important Rules:
- If the email is clearly spam, Delete it (4).
- If the email is urgent/from a boss, NEVER Ignore (0) or Delete (4) or Archive (3).
- If the email asks for a report/update and says "forward", Forward it (2).
- If the email is urgent and asks a question, Reply (1).
- If the email is a routine question from a colleague, Reply (1).
Return your answer strictly in the following JSON format:
{
"action": <int>
}
"""
def log_start(task: str, env: str, model: str) -> None:
print(f"[START] task={task} env={env} model={model}", flush=True)
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
error_val = error if error else "null"
done_val = str(done).lower()
print(
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
flush=True,
)
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
def get_agent_action(client: OpenAI, email) -> int:
if not client.api_key or client.api_key == "dummy":
if email.is_spam: return 4
if email.is_urgent: return 1
return 3
human_prompt = f"Sender: {email.sender}\nSubject: {email.subject}\nBody:\n{email.body}"
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": human_prompt}
],
response_format={"type": "json_object"},
temperature=0.0
)
data = json.loads(response.choices[0].message.content)
return int(data.get("action", 3))
except Exception as e:
return 3
def main():
client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
task_configs = {
'easy': {'criteria': [{'name': 'accuracy', 'weight': 0.8}, {'name': 'critical_safety', 'weight': 0.2}]},
'medium': {'criteria': [{'name': 'accuracy', 'weight': 0.7}, {'name': 'critical_safety', 'weight': 0.3}]},
'hard': {'criteria': [{'name': 'accuracy', 'weight': 0.6}, {'name': 'critical_safety', 'weight': 0.4}]}
}
for level in ["easy", "medium", "hard"]:
log_start(task=level, env=BENCHMARK, model=MODEL_NAME)
config = EnvConfig(task_level=level, verbose=False)
env = OpenEnv(config=config)
grader = create_grader(level, task_configs[level])
try:
obs, info = env.reset(seed=42)
grader.reset()
rewards = []
steps_taken = 0
while obs.current_email is not None:
steps_taken += 1
error = None
try:
action_int = get_agent_action(client, obs.current_email)
except Exception as e:
action_int = 3
error = str(e)
obs, reward, terminated, truncated, info = env.step(Action(action_type=action_int))
grader.update(**info)
rewards.append(reward)
done = terminated or truncated
log_step(step=steps_taken, action=str(action_int), reward=reward, done=done, error=error)
if done:
break
report = grader.get_grade_report()
log_end(success=report['passed'], steps=steps_taken, score=report['final_score'], rewards=rewards)
finally:
try:
env.close()
except Exception as e:
print(f"[DEBUG] env.close() error: {e}", flush=True)
if __name__ == "__main__":
main()
|