Flickinshots commited on
Commit
200a73b
·
verified ·
1 Parent(s): 696d083

Deploy Project Epsilon Space bundle

Browse files
README.md CHANGED
@@ -6,6 +6,10 @@ colorTo: gray
6
  sdk: docker
7
  app_port: 7860
8
  pinned: false
 
 
 
 
9
  short_description: OpenEnv executive assistant sandbox demo for judges.
10
  ---
11
 
 
6
  sdk: docker
7
  app_port: 7860
8
  pinned: false
9
+ tags:
10
+ - openenv
11
+ - docker
12
+ - gradio
13
  short_description: OpenEnv executive assistant sandbox demo for judges.
14
  ---
15
 
docs/HF_SPACE_README.md CHANGED
@@ -6,6 +6,10 @@ colorTo: gray
6
  sdk: docker
7
  app_port: 7860
8
  pinned: false
 
 
 
 
9
  short_description: OpenEnv executive assistant sandbox demo for judges.
10
  ---
11
 
 
6
  sdk: docker
7
  app_port: 7860
8
  pinned: false
9
+ tags:
10
+ - openenv
11
+ - docker
12
+ - gradio
13
  short_description: OpenEnv executive assistant sandbox demo for judges.
14
  ---
15
 
inference.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+
7
+ from src.executive_assistant.agent import OpenRouterPolicy
8
+ from src.executive_assistant.config import OpenRouterConfig
9
+ from src.executive_assistant.runner import run_policy_suite
10
+
11
+
12
+ TASKS = [
13
+ "easy_deadline_extraction",
14
+ "medium_triage_and_negotiation",
15
+ "hard_rag_reply",
16
+ ]
17
+
18
+
19
+ def build_openai_compatible_policy() -> OpenRouterPolicy:
20
+ api_key = os.environ.get("OPENAI_API_KEY", "").strip()
21
+ base_url = os.environ.get("API_BASE_URL", "").strip()
22
+ model_name = os.environ.get("MODEL_NAME", "").strip()
23
+ if not api_key:
24
+ raise RuntimeError("OPENAI_API_KEY is required.")
25
+ if not base_url:
26
+ raise RuntimeError("API_BASE_URL is required.")
27
+ if not model_name:
28
+ raise RuntimeError("MODEL_NAME is required.")
29
+ config = OpenRouterConfig(
30
+ api_key=api_key,
31
+ base_url=base_url,
32
+ model_name=model_name,
33
+ site_url=os.environ.get("OPENROUTER_SITE_URL", "http://localhost:7860"),
34
+ app_name=os.environ.get(
35
+ "OPENROUTER_APP_NAME",
36
+ "EmailMaestro | Executive Assistant Sandbox",
37
+ ),
38
+ temperature=float(os.environ.get("OPENROUTER_TEMPERATURE", "0.0")),
39
+ max_tokens=int(os.environ.get("OPENROUTER_MAX_TOKENS", "600")),
40
+ )
41
+ return OpenRouterPolicy(config=config)
42
+
43
+
44
+ def summarize_traces(traces) -> dict[str, dict[str, object]]:
45
+ return {
46
+ task_name: {
47
+ "completed": trace.completed,
48
+ "final_score": trace.final_score,
49
+ "steps": len(trace.steps),
50
+ "termination_reason": trace.termination_reason,
51
+ }
52
+ for task_name, trace in traces.items()
53
+ }
54
+
55
+
56
+ def main() -> None:
57
+ parser = argparse.ArgumentParser(
58
+ description="Run the required OpenAI-client inference baseline against all seeded tasks."
59
+ )
60
+ parser.add_argument("--max-steps", type=int, default=12)
61
+ args = parser.parse_args()
62
+
63
+ traces = run_policy_suite(
64
+ policy=build_openai_compatible_policy(),
65
+ task_names=TASKS,
66
+ max_steps=args.max_steps,
67
+ )
68
+ print(json.dumps(summarize_traces(traces), indent=2))
69
+
70
+
71
+ if __name__ == "__main__":
72
+ main()
run.py CHANGED
@@ -20,10 +20,10 @@ for step in range(10):
20
  print("Reasoning:", decision.reasoning)
21
  print("Action:", decision.action)
22
 
23
- obs, reward = env.step(decision.action)
24
 
25
  print("Reward:", reward)
26
 
27
- if reward.is_done:
28
  print("\nTASK COMPLETE ✅")
29
- break
 
20
  print("Reasoning:", decision.reasoning)
21
  print("Action:", decision.action)
22
 
23
+ obs, reward, done, _ = env.step(decision.action)
24
 
25
  print("Reward:", reward)
26
 
27
+ if done:
28
  print("\nTASK COMPLETE ✅")
29
+ break
src/executive_assistant/config.py CHANGED
@@ -36,13 +36,23 @@ class OpenRouterConfig:
36
  def from_env(cls, env_file: str | Path | None = None) -> "OpenRouterConfig":
37
  if env_file is not None:
38
  load_env_file(env_file)
39
- api_key = os.environ.get("OPENROUTER_API_KEY", "").strip()
 
 
40
  if not api_key:
41
- raise RuntimeError("OPENROUTER_API_KEY is required for OpenRouter model access.")
 
 
42
  return cls(
43
  api_key=api_key,
44
- model_name=os.environ.get("OPENROUTER_MODEL", "google/gemma-4-31b-it"),
45
- base_url=os.environ.get("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"),
 
 
 
 
 
 
46
  site_url=os.environ.get("OPENROUTER_SITE_URL", "http://localhost:7860"),
47
  app_name=os.environ.get(
48
  "OPENROUTER_APP_NAME",
 
36
  def from_env(cls, env_file: str | Path | None = None) -> "OpenRouterConfig":
37
  if env_file is not None:
38
  load_env_file(env_file)
39
+ api_key = os.environ.get("OPENROUTER_API_KEY", "").strip() or os.environ.get(
40
+ "OPENAI_API_KEY", ""
41
+ ).strip()
42
  if not api_key:
43
+ raise RuntimeError(
44
+ "OPENROUTER_API_KEY or OPENAI_API_KEY is required for model access."
45
+ )
46
  return cls(
47
  api_key=api_key,
48
+ model_name=os.environ.get(
49
+ "OPENROUTER_MODEL",
50
+ os.environ.get("MODEL_NAME", "google/gemma-4-31b-it"),
51
+ ),
52
+ base_url=os.environ.get(
53
+ "OPENROUTER_BASE_URL",
54
+ os.environ.get("API_BASE_URL", "https://openrouter.ai/api/v1"),
55
+ ),
56
  site_url=os.environ.get("OPENROUTER_SITE_URL", "http://localhost:7860"),
57
  app_name=os.environ.get(
58
  "OPENROUTER_APP_NAME",
src/executive_assistant/deployment.py CHANGED
@@ -98,6 +98,10 @@ colorTo: gray
98
  sdk: docker
99
  app_port: {config.app_port}
100
  pinned: false
 
 
 
 
101
  short_description: OpenEnv executive assistant sandbox demo for judges.
102
  ---
103
 
 
98
  sdk: docker
99
  app_port: {config.app_port}
100
  pinned: false
101
+ tags:
102
+ - openenv
103
+ - docker
104
+ - gradio
105
  short_description: OpenEnv executive assistant sandbox demo for judges.
106
  ---
107
 
src/executive_assistant/env.py CHANGED
@@ -33,6 +33,18 @@ class ExecutiveAssistantEnv:
33
  self.step_count = 0
34
  return self.observe()
35
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def observe(self) -> WorkspaceObservation:
37
  unread = [
38
  EmailSummary(
@@ -58,7 +70,7 @@ class ExecutiveAssistantEnv:
58
  action_history=recent_actions,
59
  )
60
 
61
- def step(self, action: AssistantAction) -> tuple[WorkspaceObservation, TaskReward]:
62
  self.step_count += 1
63
  if action.action_type == "read_email" and action.target_id is not None:
64
  row = self.workspace.read_email(action.target_id)
@@ -111,7 +123,17 @@ class ExecutiveAssistantEnv:
111
  is_done=True,
112
  reasoning=f"{reward.reasoning}; terminated at step budget",
113
  )
114
- return observation, reward
 
 
 
 
 
 
 
 
 
 
115
 
116
  def grade(self) -> TaskReward:
117
  if self.task_name == "easy_deadline_extraction":
 
33
  self.step_count = 0
34
  return self.observe()
35
 
36
+ def state(self) -> dict[str, object]:
37
+ return {
38
+ "task_name": self.task_name,
39
+ "step_count": self.step_count,
40
+ "max_steps": self.max_steps,
41
+ "last_action_status": self.last_action_status,
42
+ "current_email": self.current_email.model_dump() if self.current_email else None,
43
+ "search_results": [result.model_dump() for result in self.search_results],
44
+ "observation": self.observe().model_dump(),
45
+ "workspace": self.workspace.snapshot(),
46
+ }
47
+
48
  def observe(self) -> WorkspaceObservation:
49
  unread = [
50
  EmailSummary(
 
70
  action_history=recent_actions,
71
  )
72
 
73
+ def step(self, action: AssistantAction) -> tuple[WorkspaceObservation, TaskReward, bool, dict[str, object]]:
74
  self.step_count += 1
75
  if action.action_type == "read_email" and action.target_id is not None:
76
  row = self.workspace.read_email(action.target_id)
 
123
  is_done=True,
124
  reasoning=f"{reward.reasoning}; terminated at step budget",
125
  )
126
+ done = reward.is_done
127
+ info = {
128
+ "task_name": self.task_name,
129
+ "step_count": self.step_count,
130
+ "max_steps": self.max_steps,
131
+ "status": self.last_action_status,
132
+ "reasoning": reward.reasoning,
133
+ "total_score": reward.total_score,
134
+ "state": self.state(),
135
+ }
136
+ return observation, reward, done, info
137
 
138
  def grade(self) -> TaskReward:
139
  if self.task_name == "easy_deadline_extraction":
src/executive_assistant/runner.py CHANGED
@@ -71,13 +71,13 @@ class EpisodeRunner:
71
  4. Update state and capture the resulting trace record
72
  """
73
  decision = self.policy.choose_action(task_name, observation)
74
- next_observation, reward = env.step(decision.action)
75
  record = EpisodeStepRecord(
76
  step_index=env.step_count,
77
  reasoning=decision.reasoning,
78
  action=decision.action.model_dump(),
79
  observation=next_observation.model_dump(),
80
- snapshot=env.workspace.snapshot(),
81
  reward=reward.model_dump(),
82
  status=next_observation.last_action_status,
83
  )
 
71
  4. Update state and capture the resulting trace record
72
  """
73
  decision = self.policy.choose_action(task_name, observation)
74
+ next_observation, reward, done, info = env.step(decision.action)
75
  record = EpisodeStepRecord(
76
  step_index=env.step_count,
77
  reasoning=decision.reasoning,
78
  action=decision.action.model_dump(),
79
  observation=next_observation.model_dump(),
80
+ snapshot=info["state"]["workspace"],
81
  reward=reward.model_dump(),
82
  status=next_observation.last_action_status,
83
  )
src/executive_assistant/training.py CHANGED
@@ -302,7 +302,7 @@ def train_q_learning(
302
  state = encode_observation(task_name, observation)
303
  decision = learner.choose_action(task_name, observation)
304
  action_name = action_name_from_decision(decision, observation)
305
- next_observation, reward = env.step(decision.action)
306
  next_state = encode_observation(task_name, next_observation)
307
  reward_delta = reward.total_score - previous_total_score - 0.01
308
  previous_total_score = reward.total_score
 
302
  state = encode_observation(task_name, observation)
303
  decision = learner.choose_action(task_name, observation)
304
  action_name = action_name_from_decision(decision, observation)
305
+ next_observation, reward, _, _ = env.step(decision.action)
306
  next_state = encode_observation(task_name, next_observation)
307
  reward_delta = reward.total_score - previous_total_score - 0.01
308
  previous_total_score = reward.total_score
tests/test_agent.py CHANGED
@@ -73,8 +73,8 @@ def test_openrouter_policy_sanitizes_hard_reply_payload() -> None:
73
  )
74
  env = ExecutiveAssistantEnv(task_name="hard_rag_reply")
75
  observation = env.reset()
76
- observation, _ = env.step(AssistantAction(action_type="read_email", target_id=1))
77
- observation, _ = env.step(AssistantAction(action_type="search_files", payload="Q3 Architecture"))
78
  decision = policy.choose_action("hard_rag_reply", observation)
79
  assert decision.action.payload is not None
80
  assert decision.action.payload.lower().startswith("hello")
@@ -123,7 +123,7 @@ def test_openrouter_policy_normalizes_easy_todo_payload() -> None:
123
  )
124
  env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
125
  observation = env.reset()
126
- observation, _ = env.step(AssistantAction(action_type="read_email", target_id=1))
127
  decision = policy.choose_action("easy_deadline_extraction", observation)
128
  assert decision.action.payload == "Proposal Due"
129
  assert decision.action.secondary_payload == "2026-04-10"
@@ -148,10 +148,10 @@ def test_openrouter_policy_repairs_medium_forward_fields() -> None:
148
  )
149
  env = ExecutiveAssistantEnv(task_name="medium_triage_and_negotiation")
150
  observation = env.reset()
151
- observation, _ = env.step(AssistantAction(action_type="archive", target_id=1))
152
- observation, _ = env.step(AssistantAction(action_type="archive", target_id=2))
153
- observation, _ = env.step(AssistantAction(action_type="archive", target_id=3))
154
- observation, _ = env.step(AssistantAction(action_type="read_email", target_id=4))
155
  decision = policy.choose_action("medium_triage_and_negotiation", observation)
156
  assert decision.action.target_id == 4
157
  assert decision.action.secondary_payload == "manager@company.com"
 
73
  )
74
  env = ExecutiveAssistantEnv(task_name="hard_rag_reply")
75
  observation = env.reset()
76
+ observation, _, _, _ = env.step(AssistantAction(action_type="read_email", target_id=1))
77
+ observation, _, _, _ = env.step(AssistantAction(action_type="search_files", payload="Q3 Architecture"))
78
  decision = policy.choose_action("hard_rag_reply", observation)
79
  assert decision.action.payload is not None
80
  assert decision.action.payload.lower().startswith("hello")
 
123
  )
124
  env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
125
  observation = env.reset()
126
+ observation, _, _, _ = env.step(AssistantAction(action_type="read_email", target_id=1))
127
  decision = policy.choose_action("easy_deadline_extraction", observation)
128
  assert decision.action.payload == "Proposal Due"
129
  assert decision.action.secondary_payload == "2026-04-10"
 
148
  )
149
  env = ExecutiveAssistantEnv(task_name="medium_triage_and_negotiation")
150
  observation = env.reset()
151
+ observation, _, _, _ = env.step(AssistantAction(action_type="archive", target_id=1))
152
+ observation, _, _, _ = env.step(AssistantAction(action_type="archive", target_id=2))
153
+ observation, _, _, _ = env.step(AssistantAction(action_type="archive", target_id=3))
154
+ observation, _, _, _ = env.step(AssistantAction(action_type="read_email", target_id=4))
155
  decision = policy.choose_action("medium_triage_and_negotiation", observation)
156
  assert decision.action.target_id == 4
157
  assert decision.action.secondary_payload == "manager@company.com"
tests/test_env.py CHANGED
@@ -11,7 +11,7 @@ def test_easy_env_reset_exposes_seeded_email() -> None:
11
  def test_easy_env_can_add_todo() -> None:
12
  env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
13
  env.reset()
14
- observation, reward = env.step(
15
  AssistantAction(
16
  action_type="add_todo",
17
  payload="Proposal due",
@@ -20,12 +20,14 @@ def test_easy_env_can_add_todo() -> None:
20
  )
21
  assert "Proposal due" in observation.active_todos
22
  assert reward.total_score >= 0.0
 
 
23
 
24
 
25
  def test_read_email_populates_current_email() -> None:
26
  env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
27
  observation = env.reset()
28
- observation, _ = env.step(
29
  AssistantAction(action_type="read_email", target_id=observation.unread_emails[0].id)
30
  )
31
  assert observation.current_email is not None
@@ -35,6 +37,15 @@ def test_read_email_populates_current_email() -> None:
35
  def test_search_files_populates_results() -> None:
36
  env = ExecutiveAssistantEnv(task_name="hard_rag_reply")
37
  env.reset()
38
- observation, _ = env.step(AssistantAction(action_type="search_files", payload="Q3 Architecture"))
39
  assert observation.search_results
40
  assert observation.search_results[0].filename == "Q3_Architecture_Report.txt"
 
 
 
 
 
 
 
 
 
 
11
  def test_easy_env_can_add_todo() -> None:
12
  env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
13
  env.reset()
14
+ observation, reward, done, info = env.step(
15
  AssistantAction(
16
  action_type="add_todo",
17
  payload="Proposal due",
 
20
  )
21
  assert "Proposal due" in observation.active_todos
22
  assert reward.total_score >= 0.0
23
+ assert done is False
24
+ assert info["task_name"] == "easy_deadline_extraction"
25
 
26
 
27
  def test_read_email_populates_current_email() -> None:
28
  env = ExecutiveAssistantEnv(task_name="easy_deadline_extraction")
29
  observation = env.reset()
30
+ observation, _, _, _ = env.step(
31
  AssistantAction(action_type="read_email", target_id=observation.unread_emails[0].id)
32
  )
33
  assert observation.current_email is not None
 
37
  def test_search_files_populates_results() -> None:
38
  env = ExecutiveAssistantEnv(task_name="hard_rag_reply")
39
  env.reset()
40
+ observation, _, _, _ = env.step(AssistantAction(action_type="search_files", payload="Q3 Architecture"))
41
  assert observation.search_results
42
  assert observation.search_results[0].filename == "Q3_Architecture_Report.txt"
43
+
44
+
45
+ def test_state_returns_workspace_snapshot() -> None:
46
+ env = ExecutiveAssistantEnv(task_name="medium_triage_and_negotiation")
47
+ env.reset()
48
+ state = env.state()
49
+ assert state["task_name"] == "medium_triage_and_negotiation"
50
+ assert "workspace" in state
51
+ assert "emails" in state["workspace"]
tests/test_inference.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from inference import build_openai_compatible_policy
4
+ from src.executive_assistant.config import OpenRouterConfig
5
+
6
+
7
+ def test_openrouter_config_accepts_hackathon_env_names(monkeypatch) -> None:
8
+ monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
9
+ monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
10
+ monkeypatch.delenv("OPENROUTER_MODEL", raising=False)
11
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key")
12
+ monkeypatch.setenv("API_BASE_URL", "https://openrouter.ai/api/v1")
13
+ monkeypatch.setenv("MODEL_NAME", "google/gemma-4-31b-it")
14
+ config = OpenRouterConfig.from_env()
15
+ assert config.api_key == "test-key"
16
+ assert config.base_url == "https://openrouter.ai/api/v1"
17
+ assert config.model_name == "google/gemma-4-31b-it"
18
+
19
+
20
+ def test_inference_builds_openai_compatible_policy(monkeypatch) -> None:
21
+ monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
22
+ monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
23
+ monkeypatch.delenv("OPENROUTER_MODEL", raising=False)
24
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key")
25
+ monkeypatch.setenv("API_BASE_URL", "https://openrouter.ai/api/v1")
26
+ monkeypatch.setenv("MODEL_NAME", "google/gemma-4-31b-it")
27
+ policy = build_openai_compatible_policy()
28
+ assert policy.config.api_key == "test-key"
29
+ assert policy.config.base_url == "https://openrouter.ai/api/v1"
30
+ assert policy.config.model_name == "google/gemma-4-31b-it"