Spaces:
Sleeping
Sleeping
File size: 3,573 Bytes
4b77608 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | import os
import json
from openai import OpenAI
from openenv.core.env import OpenEnv
from openenv.core.config import EnvConfig
from openenv.core.models import Action
from openenv.core.grader import create_grader
system_prompt = """You are an Email Triage AI Agent. You must decide the best action to take for an incoming email.
Possible actions:
0 = Ignore
1 = Reply
2 = Forward
3 = Archive (for newsletters/generic non-urgent internal updates)
4 = Delete (for spam)
Important Rules:
- If the email is clearly spam, Delete it (4).
- If the email is urgent/from a boss, NEVER Ignore (0) or Delete (4) or Archive (3).
- If the email asks for a report/update and says "forward", Forward it (2).
- If the email is urgent and asks a question, Reply (1).
- If the email is a routine question from a colleague, Reply (1).
Return your answer strictly in the following JSON format:
{
"action": <int>
}
"""
def get_agent_action(client: OpenAI, email) -> int:
if client is None:
# Mocking basic behavior if no API key
if email.is_spam: return 4
if email.is_urgent: return 1
return 3
human_prompt = f"Sender: {email.sender}\nSubject: {email.subject}\nBody:\n{email.body}"
try:
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": human_prompt}
],
response_format={"type": "json_object"},
temperature=0.0
)
data = json.loads(response.choices[0].message.content)
return data.get("action", 3)
except Exception as e:
print(f"OpenAI error: {e}")
return 3 # default to archive
def main():
api_key = os.environ.get("OPENAI_API_KEY", "")
client = OpenAI(api_key=api_key) if api_key else None
if not client:
print("Warning: OPENAI_API_KEY not set. Using mocked basic agent.")
print("Running Baseline Inference on all Tasks...")
# Assuming yaml task configs
task_configs = {
'easy': {'criteria': [{'name': 'accuracy', 'weight': 0.8}, {'name': 'critical_safety', 'weight': 0.2}]},
'medium': {'criteria': [{'name': 'accuracy', 'weight': 0.7}, {'name': 'critical_safety', 'weight': 0.3}]},
'hard': {'criteria': [{'name': 'accuracy', 'weight': 0.6}, {'name': 'critical_safety', 'weight': 0.4}]}
}
for level in ["easy", "medium", "hard"]:
print(f"\n--- Testing Level: {level.upper()} ---")
config = EnvConfig(task_level=level, verbose=False)
env = OpenEnv(config=config)
grader = create_grader(level, task_configs[level])
obs, info = env.reset(seed=42)
grader.reset()
while not obs.current_email is None:
action_int = get_agent_action(client, obs.current_email)
obs, reward, terminated, truncated, info = env.step(Action(action_type=action_int))
grader.update(**info)
if terminated:
break
report = grader.get_grade_report()
print(f"Final Score: {report['final_score']:.2f} / 1.0 (Passed? {report['passed']})")
print(f"Accuracy: {report['criteria_scores'].get('accuracy', 0):.2f}")
print(f"Safety: {report['criteria_scores'].get('critical_safety', 0):.2f}")
print(f"Metrics: Incorrect={report['episode_data']['incorrect_actions']}, Critical Failures={report['episode_data']['critical_failures']}")
if __name__ == "__main__":
main()
|