Spaces:
Running
Running
Deploy Project Epsilon Space bundle
Browse files- README.md +4 -0
- docs/HF_SPACE_README.md +4 -0
- inference.py +72 -0
- run.py +3 -3
- src/executive_assistant/config.py +14 -4
- src/executive_assistant/deployment.py +4 -0
- src/executive_assistant/env.py +24 -2
- src/executive_assistant/runner.py +2 -2
- src/executive_assistant/training.py +1 -1
- tests/test_agent.py +7 -7
- tests/test_env.py +14 -3
- tests/test_inference.py +30 -0
README.md
CHANGED
|
@@ -6,6 +6,10 @@ colorTo: gray
|
|
| 6 |
sdk: docker
|
| 7 |
app_port: 7860
|
| 8 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
short_description: OpenEnv executive assistant sandbox demo for judges.
|
| 10 |
---
|
| 11 |
|
|
|
|
| 6 |
sdk: docker
|
| 7 |
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
+
tags:
|
| 10 |
+
- openenv
|
| 11 |
+
- docker
|
| 12 |
+
- gradio
|
| 13 |
short_description: OpenEnv executive assistant sandbox demo for judges.
|
| 14 |
---
|
| 15 |
|
docs/HF_SPACE_README.md
CHANGED
|
@@ -6,6 +6,10 @@ colorTo: gray
|
|
| 6 |
sdk: docker
|
| 7 |
app_port: 7860
|
| 8 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
short_description: OpenEnv executive assistant sandbox demo for judges.
|
| 10 |
---
|
| 11 |
|
|
|
|
| 6 |
sdk: docker
|
| 7 |
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
+
tags:
|
| 10 |
+
- openenv
|
| 11 |
+
- docker
|
| 12 |
+
- gradio
|
| 13 |
short_description: OpenEnv executive assistant sandbox demo for judges.
|
| 14 |
---
|
| 15 |
|
inference.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
from src.executive_assistant.agent import OpenRouterPolicy
|
| 8 |
+
from src.executive_assistant.config import OpenRouterConfig
|
| 9 |
+
from src.executive_assistant.runner import run_policy_suite
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
TASKS = [
|
| 13 |
+
"easy_deadline_extraction",
|
| 14 |
+
"medium_triage_and_negotiation",
|
| 15 |
+
"hard_rag_reply",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def build_openai_compatible_policy() -> OpenRouterPolicy:
|
| 20 |
+
api_key = os.environ.get("OPENAI_API_KEY", "").strip()
|
| 21 |
+
base_url = os.environ.get("API_BASE_URL", "").strip()
|
| 22 |
+
model_name = os.environ.get("MODEL_NAME", "").strip()
|
| 23 |
+
if not api_key:
|
| 24 |
+
raise RuntimeError("OPENAI_API_KEY is required.")
|
| 25 |
+
if not base_url:
|
| 26 |
+
raise RuntimeError("API_BASE_URL is required.")
|
| 27 |
+
if not model_name:
|
| 28 |
+
raise RuntimeError("MODEL_NAME is required.")
|
| 29 |
+
config = OpenRouterConfig(
|
| 30 |
+
api_key=api_key,
|
| 31 |
+
base_url=base_url,
|
| 32 |
+
model_name=model_name,
|
| 33 |
+
site_url=os.environ.get("OPENROUTER_SITE_URL", "http://localhost:7860"),
|
| 34 |
+
app_name=os.environ.get(
|
| 35 |
+
"OPENROUTER_APP_NAME",
|
| 36 |
+
"EmailMaestro | Executive Assistant Sandbox",
|
| 37 |
+
),
|
| 38 |
+
temperature=float(os.environ.get("OPENROUTER_TEMPERATURE", "0.0")),
|
| 39 |
+
max_tokens=int(os.environ.get("OPENROUTER_MAX_TOKENS", "600")),
|
| 40 |
+
)
|
| 41 |
+
return OpenRouterPolicy(config=config)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def summarize_traces(traces) -> dict[str, dict[str, object]]:
|
| 45 |
+
return {
|
| 46 |
+
task_name: {
|
| 47 |
+
"completed": trace.completed,
|
| 48 |
+
"final_score": trace.final_score,
|
| 49 |
+
"steps": len(trace.steps),
|
| 50 |
+
"termination_reason": trace.termination_reason,
|
| 51 |
+
}
|
| 52 |
+
for task_name, trace in traces.items()
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def main() -> None:
|
| 57 |
+
parser = argparse.ArgumentParser(
|
| 58 |
+
description="Run the required OpenAI-client inference baseline against all seeded tasks."
|
| 59 |
+
)
|
| 60 |
+
parser.add_argument("--max-steps", type=int, default=12)
|
| 61 |
+
args = parser.parse_args()
|
| 62 |
+
|
| 63 |
+
traces = run_policy_suite(
|
| 64 |
+
policy=build_openai_compatible_policy(),
|
| 65 |
+
task_names=TASKS,
|
| 66 |
+
max_steps=args.max_steps,
|
| 67 |
+
)
|
| 68 |
+
print(json.dumps(summarize_traces(traces), indent=2))
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
main()
|
run.py
CHANGED
|
@@ -20,10 +20,10 @@ for step in range(10):
|
|
| 20 |
print("Reasoning:", decision.reasoning)
|
| 21 |
print("Action:", decision.action)
|
| 22 |
|
| 23 |
-
obs, reward = env.step(decision.action)
|
| 24 |
|
| 25 |
print("Reward:", reward)
|
| 26 |
|
| 27 |
-
if
|
| 28 |
print("\nTASK COMPLETE ✅")
|
| 29 |
-
break
|
|
|
|
| 20 |
print("Reasoning:", decision.reasoning)
|
| 21 |
print("Action:", decision.action)
|
| 22 |
|
| 23 |
+
obs, reward, done, _ = env.step(decision.action)
|
| 24 |
|
| 25 |
print("Reward:", reward)
|
| 26 |
|
| 27 |
+
if done:
|
| 28 |
print("\nTASK COMPLETE ✅")
|
| 29 |
+
break
|
src/executive_assistant/config.py
CHANGED
|
@@ -36,13 +36,23 @@ class OpenRouterConfig:
|
|
| 36 |
def from_env(cls, env_file: str | Path | None = None) -> "OpenRouterConfig":
|
| 37 |
if env_file is not None:
|
| 38 |
load_env_file(env_file)
|
| 39 |
-
api_key = os.environ.get("OPENROUTER_API_KEY", "").strip()
|
|
|
|
|
|
|
| 40 |
if not api_key:
|
| 41 |
-
raise RuntimeError(
|
|
|
|
|
|
|
| 42 |
return cls(
|
| 43 |
api_key=api_key,
|
| 44 |
-
model_name=os.environ.get(
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
site_url=os.environ.get("OPENROUTER_SITE_URL", "http://localhost:7860"),
|
| 47 |
app_name=os.environ.get(
|
| 48 |
"OPENROUTER_APP_NAME",
|
|
|
|
| 36 |
def from_env(cls, env_file: str | Path | None = None) -> "OpenRouterConfig":
|
| 37 |
if env_file is not None:
|
| 38 |
load_env_file(env_file)
|
| 39 |
+
api_key = os.environ.get("OPENROUTER_API_KEY", "").strip() or os.environ.get(
|
| 40 |
+
"OPENAI_API_KEY", ""
|
| 41 |
+
).strip()
|
| 42 |
if not api_key:
|
| 43 |
+
raise RuntimeError(
|
| 44 |
+
"OPENROUTER_API_KEY or OPENAI_API_KEY is required for model access."
|
| 45 |
+
)
|
| 46 |
return cls(
|
| 47 |
api_key=api_key,
|
| 48 |
+
model_name=os.environ.get(
|
| 49 |
+
"OPENROUTER_MODEL",
|
| 50 |
+
os.environ.get("MODEL_NAME", "google/gemma-4-31b-it"),
|
| 51 |
+
),
|
| 52 |
+
base_url=os.environ.get(
|
| 53 |
+
"OPENROUTER_BASE_URL",
|
| 54 |
+
os.environ.get("API_BASE_URL", "https://openrouter.ai/api/v1"),
|
| 55 |
+
),
|
| 56 |
site_url=os.environ.get("OPENROUTER_SITE_URL", "http://localhost:7860"),
|
| 57 |
app_name=os.environ.get(
|
| 58 |
"OPENROUTER_APP_NAME",
|
src/executive_assistant/deployment.py
CHANGED
|
@@ -98,6 +98,10 @@ colorTo: gray
|
|
| 98 |
sdk: docker
|
| 99 |
app_port: {config.app_port}
|
| 100 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
short_description: OpenEnv executive assistant sandbox demo for judges.
|
| 102 |
---
|
| 103 |
|
|
|
|
| 98 |
sdk: docker
|
| 99 |
app_port: {config.app_port}
|
| 100 |
pinned: false
|
| 101 |
+
tags:
|
| 102 |
+
- openenv
|
| 103 |
+
- docker
|
| 104 |
+
- gradio
|
| 105 |
short_description: OpenEnv executive assistant sandbox demo for judges.
|
| 106 |
---
|
| 107 |
|
src/executive_assistant/env.py
CHANGED
|
@@ -33,6 +33,18 @@ class ExecutiveAssistantEnv:
|
|
| 33 |
self.step_count = 0
|
| 34 |
return self.observe()
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
def observe(self) -> WorkspaceObservation:
|
| 37 |
unread = [
|
| 38 |
EmailSummary(
|
|
@@ -58,7 +70,7 @@ class ExecutiveAssistantEnv:
|
|
| 58 |
action_history=recent_actions,
|
| 59 |
)
|
| 60 |
|
| 61 |
-
def step(self, action: AssistantAction) -> tuple[WorkspaceObservation, TaskReward]:
|
| 62 |
self.step_count += 1
|
| 63 |
if action.action_type == "read_email" and action.target_id is not None:
|
| 64 |
row = self.workspace.read_email(action.target_id)
|
|
@@ -111,7 +123,17 @@ class ExecutiveAssistantEnv:
|
|
| 111 |
is_done=True,
|
| 112 |
reasoning=f"{reward.reasoning}; terminated at step budget",
|
| 113 |
)
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
def grade(self) -> TaskReward:
|
| 117 |
if self.task_name == "easy_deadline_extraction":
|
|
|
|
| 33 |
self.step_count = 0
|
| 34 |
return self.observe()
|
| 35 |
|
| 36 |
+
def state(self) -> dict[str, object]:
|
| 37 |
+
return {
|
| 38 |
+
"task_name": self.task_name,
|
| 39 |
+
"step_count": self.step_count,
|
| 40 |
+
"max_steps": self.max_steps,
|
| 41 |
+
"last_action_status": self.last_action_status,
|
| 42 |
+
"current_email": self.current_email.model_dump() if self.current_email else None,
|
| 43 |
+
"search_results": [result.model_dump() for result in self.search_results],
|
| 44 |
+
"observation": self.observe().model_dump(),
|
| 45 |
+
"workspace": self.workspace.snapshot(),
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
def observe(self) -> WorkspaceObservation:
|
| 49 |
unread = [
|
| 50 |
EmailSummary(
|
|
|
|
| 70 |
action_history=recent_actions,
|
| 71 |
)
|
| 72 |
|
| 73 |
+
def step(self, action: AssistantAction) -> tuple[WorkspaceObservation, TaskReward, bool, dict[str, object]]:
|
| 74 |
self.step_count += 1
|
| 75 |
if action.action_type == "read_email" and action.target_id is not None:
|
| 76 |
row = self.workspace.read_email(action.target_id)
|
|
|
|
| 123 |
is_done=True,
|
| 124 |
reasoning=f"{reward.reasoning}; terminated at step budget",
|
| 125 |
)
|
| 126 |
+
done = reward.is_done
|
| 127 |
+
info = {
|
| 128 |
+
"task_name": self.task_name,
|
| 129 |
+
"step_count": self.step_count,
|
| 130 |
+
"max_steps": self.max_steps,
|
| 131 |
+
"status": self.last_action_status,
|
| 132 |
+
"reasoning": reward.reasoning,
|
| 133 |
+
"total_score": reward.total_score,
|
| 134 |
+
"state": self.state(),
|
| 135 |
+
}
|
| 136 |
+
return observation, reward, done, info
|
| 137 |
|
| 138 |
def grade(self) -> TaskReward:
|
| 139 |
if self.task_name == "easy_deadline_extraction":
|
src/executive_assistant/runner.py
CHANGED
|
@@ -71,13 +71,13 @@ class EpisodeRunner:
|
|
| 71 |
4. Update state and capture the resulting trace record
|
| 72 |
"""
|
| 73 |
decision = self.policy.choose_action(task_name, observation)
|
| 74 |
-
next_observation, reward = env.step(decision.action)
|
| 75 |
record = EpisodeStepRecord(
|
| 76 |
step_index=env.step_count,
|
| 77 |
reasoning=decision.reasoning,
|
| 78 |
action=decision.action.model_dump(),
|
| 79 |
observation=next_observation.model_dump(),
|
| 80 |
-
snapshot=
|
| 81 |
reward=reward.model_dump(),
|
| 82 |
status=next_observation.last_action_status,
|
| 83 |
)
|
|
|
|
| 71 |
4. Update state and capture the resulting trace record
|
| 72 |
"""
|
| 73 |
decision = self.policy.choose_action(task_name, observation)
|
| 74 |
+
next_observation, reward, done, info = env.step(decision.action)
|
| 75 |
record = EpisodeStepRecord(
|
| 76 |
step_index=env.step_count,
|
| 77 |
reasoning=decision.reasoning,
|
| 78 |
action=decision.action.model_dump(),
|
| 79 |
observation=next_observation.model_dump(),
|
| 80 |
+
snapshot=info["state"]["workspace"],
|
| 81 |
reward=reward.model_dump(),
|
| 82 |
status=next_observation.last_action_status,
|
| 83 |
)
|
src/executive_assistant/training.py
CHANGED
|
@@ -302,7 +302,7 @@ def train_q_learning(
|
|
| 302 |
state = encode_observation(task_name, observation)
|
| 303 |
decision = learner.choose_action(task_name, observation)
|
| 304 |
action_name = action_name_from_decision(decision, observation)
|
| 305 |
-
next_observation, reward = env.step(decision.action)
|
| 306 |
next_state = encode_observation(task_name, next_observation)
|
| 307 |
reward_delta = reward.total_score - previous_total_score - 0.01
|
| 308 |
previous_total_score = reward.total_score
|
|
|
|
| 302 |
state = encode_observation(task_name, observation)
|
| 303 |
decision = learner.choose_action(task_name, observation)
|
| 304 |
action_name = action_name_from_decision(decision, observation)
|
| 305 |
+
next_observation, reward, _, _ = env.step(decision.action)
|
| 306 |
next_state = encode_observation(task_name, next_observation)
|
| 307 |
reward_delta = reward.total_score - previous_total_score - 0.01
|
| 308 |
previous_total_score = reward.total_score
|
tests/test_agent.py
CHANGED
|
@@ -73,8 +73,8 @@ def test_openrouter_policy_sanitizes_hard_reply_payload() -> None:
|
|
| 73 |
)
|
| 74 |
env = ExecutiveAssistantEnv(task_name="hard_rag_reply")
|
| 75 |
observation = env.reset()
|
| 76 |
-
observation, _ = env.step(AssistantAction(action_type="read_email", target_id=1))
|
| 77 |
-
observation, _ = env.step(AssistantAction(action_type="search_files", payload="Q3 Architecture"))
|
| 78 |
decision = policy.choose_action("hard_rag_reply", observation)
|
| 79 |
assert decision.action.payload is not None
|
| 80 |
assert decision.action.payload.lower().startswith("hello")
|
|
@@ -123,7 +123,7 @@ def test_openrouter_policy_normalizes_easy_todo_payload() -> None:
|
|
| 123 |
)
|
| 124 |
env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
|
| 125 |
observation = env.reset()
|
| 126 |
-
observation, _ = env.step(AssistantAction(action_type="read_email", target_id=1))
|
| 127 |
decision = policy.choose_action("easy_deadline_extraction", observation)
|
| 128 |
assert decision.action.payload == "Proposal Due"
|
| 129 |
assert decision.action.secondary_payload == "2026-04-10"
|
|
@@ -148,10 +148,10 @@ def test_openrouter_policy_repairs_medium_forward_fields() -> None:
|
|
| 148 |
)
|
| 149 |
env = ExecutiveAssistantEnv(task_name="medium_triage_and_negotiation")
|
| 150 |
observation = env.reset()
|
| 151 |
-
observation, _ = env.step(AssistantAction(action_type="archive", target_id=1))
|
| 152 |
-
observation, _ = env.step(AssistantAction(action_type="archive", target_id=2))
|
| 153 |
-
observation, _ = env.step(AssistantAction(action_type="archive", target_id=3))
|
| 154 |
-
observation, _ = env.step(AssistantAction(action_type="read_email", target_id=4))
|
| 155 |
decision = policy.choose_action("medium_triage_and_negotiation", observation)
|
| 156 |
assert decision.action.target_id == 4
|
| 157 |
assert decision.action.secondary_payload == "manager@company.com"
|
|
|
|
| 73 |
)
|
| 74 |
env = ExecutiveAssistantEnv(task_name="hard_rag_reply")
|
| 75 |
observation = env.reset()
|
| 76 |
+
observation, _, _, _ = env.step(AssistantAction(action_type="read_email", target_id=1))
|
| 77 |
+
observation, _, _, _ = env.step(AssistantAction(action_type="search_files", payload="Q3 Architecture"))
|
| 78 |
decision = policy.choose_action("hard_rag_reply", observation)
|
| 79 |
assert decision.action.payload is not None
|
| 80 |
assert decision.action.payload.lower().startswith("hello")
|
|
|
|
| 123 |
)
|
| 124 |
env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
|
| 125 |
observation = env.reset()
|
| 126 |
+
observation, _, _, _ = env.step(AssistantAction(action_type="read_email", target_id=1))
|
| 127 |
decision = policy.choose_action("easy_deadline_extraction", observation)
|
| 128 |
assert decision.action.payload == "Proposal Due"
|
| 129 |
assert decision.action.secondary_payload == "2026-04-10"
|
|
|
|
| 148 |
)
|
| 149 |
env = ExecutiveAssistantEnv(task_name="medium_triage_and_negotiation")
|
| 150 |
observation = env.reset()
|
| 151 |
+
observation, _, _, _ = env.step(AssistantAction(action_type="archive", target_id=1))
|
| 152 |
+
observation, _, _, _ = env.step(AssistantAction(action_type="archive", target_id=2))
|
| 153 |
+
observation, _, _, _ = env.step(AssistantAction(action_type="archive", target_id=3))
|
| 154 |
+
observation, _, _, _ = env.step(AssistantAction(action_type="read_email", target_id=4))
|
| 155 |
decision = policy.choose_action("medium_triage_and_negotiation", observation)
|
| 156 |
assert decision.action.target_id == 4
|
| 157 |
assert decision.action.secondary_payload == "manager@company.com"
|
tests/test_env.py
CHANGED
|
@@ -11,7 +11,7 @@ def test_easy_env_reset_exposes_seeded_email() -> None:
|
|
| 11 |
def test_easy_env_can_add_todo() -> None:
|
| 12 |
env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
|
| 13 |
env.reset()
|
| 14 |
-
observation, reward = env.step(
|
| 15 |
AssistantAction(
|
| 16 |
action_type="add_todo",
|
| 17 |
payload="Proposal due",
|
|
@@ -20,12 +20,14 @@ def test_easy_env_can_add_todo() -> None:
|
|
| 20 |
)
|
| 21 |
assert "Proposal due" in observation.active_todos
|
| 22 |
assert reward.total_score >= 0.0
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def test_read_email_populates_current_email() -> None:
|
| 26 |
env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
|
| 27 |
observation = env.reset()
|
| 28 |
-
observation, _ = env.step(
|
| 29 |
AssistantAction(action_type="read_email", target_id=observation.unread_emails[0].id)
|
| 30 |
)
|
| 31 |
assert observation.current_email is not None
|
|
@@ -35,6 +37,15 @@ def test_read_email_populates_current_email() -> None:
|
|
| 35 |
def test_search_files_populates_results() -> None:
|
| 36 |
env = ExecutiveAssistantEnv(task_name="hard_rag_reply")
|
| 37 |
env.reset()
|
| 38 |
-
observation, _ = env.step(AssistantAction(action_type="search_files", payload="Q3 Architecture"))
|
| 39 |
assert observation.search_results
|
| 40 |
assert observation.search_results[0].filename == "Q3_Architecture_Report.txt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def test_easy_env_can_add_todo() -> None:
|
| 12 |
env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
|
| 13 |
env.reset()
|
| 14 |
+
observation, reward, done, info = env.step(
|
| 15 |
AssistantAction(
|
| 16 |
action_type="add_todo",
|
| 17 |
payload="Proposal due",
|
|
|
|
| 20 |
)
|
| 21 |
assert "Proposal due" in observation.active_todos
|
| 22 |
assert reward.total_score >= 0.0
|
| 23 |
+
assert done is False
|
| 24 |
+
assert info["task_name"] == "easy_deadline_extraction"
|
| 25 |
|
| 26 |
|
| 27 |
def test_read_email_populates_current_email() -> None:
|
| 28 |
env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
|
| 29 |
observation = env.reset()
|
| 30 |
+
observation, _, _, _ = env.step(
|
| 31 |
AssistantAction(action_type="read_email", target_id=observation.unread_emails[0].id)
|
| 32 |
)
|
| 33 |
assert observation.current_email is not None
|
|
|
|
| 37 |
def test_search_files_populates_results() -> None:
|
| 38 |
env = ExecutiveAssistantEnv(task_name="hard_rag_reply")
|
| 39 |
env.reset()
|
| 40 |
+
observation, _, _, _ = env.step(AssistantAction(action_type="search_files", payload="Q3 Architecture"))
|
| 41 |
assert observation.search_results
|
| 42 |
assert observation.search_results[0].filename == "Q3_Architecture_Report.txt"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_state_returns_workspace_snapshot() -> None:
|
| 46 |
+
env = ExecutiveAssistantEnv(task_name="medium_triage_and_negotiation")
|
| 47 |
+
env.reset()
|
| 48 |
+
state = env.state()
|
| 49 |
+
assert state["task_name"] == "medium_triage_and_negotiation"
|
| 50 |
+
assert "workspace" in state
|
| 51 |
+
assert "emails" in state["workspace"]
|
tests/test_inference.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from inference import build_openai_compatible_policy
|
| 4 |
+
from src.executive_assistant.config import OpenRouterConfig
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def test_openrouter_config_accepts_hackathon_env_names(monkeypatch) -> None:
|
| 8 |
+
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
| 9 |
+
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
| 10 |
+
monkeypatch.delenv("OPENROUTER_MODEL", raising=False)
|
| 11 |
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
| 12 |
+
monkeypatch.setenv("API_BASE_URL", "https://openrouter.ai/api/v1")
|
| 13 |
+
monkeypatch.setenv("MODEL_NAME", "google/gemma-4-31b-it")
|
| 14 |
+
config = OpenRouterConfig.from_env()
|
| 15 |
+
assert config.api_key == "test-key"
|
| 16 |
+
assert config.base_url == "https://openrouter.ai/api/v1"
|
| 17 |
+
assert config.model_name == "google/gemma-4-31b-it"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_inference_builds_openai_compatible_policy(monkeypatch) -> None:
|
| 21 |
+
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
| 22 |
+
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
| 23 |
+
monkeypatch.delenv("OPENROUTER_MODEL", raising=False)
|
| 24 |
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
| 25 |
+
monkeypatch.setenv("API_BASE_URL", "https://openrouter.ai/api/v1")
|
| 26 |
+
monkeypatch.setenv("MODEL_NAME", "google/gemma-4-31b-it")
|
| 27 |
+
policy = build_openai_compatible_policy()
|
| 28 |
+
assert policy.config.api_key == "test-key"
|
| 29 |
+
assert policy.config.base_url == "https://openrouter.ai/api/v1"
|
| 30 |
+
assert policy.config.model_name == "google/gemma-4-31b-it"
|