morty649 commited on
Commit
a6eaeaf
·
2 Parent(s): eff241c8068223

Merge hf/main and keep hackathon proxy fix

Browse files
Dockerfile CHANGED
@@ -23,6 +23,6 @@ ENV PATH="/app/env/.venv/bin:$PATH"
23
  ENV PYTHONPATH="/app/env:$PYTHONPATH"
24
 
25
  HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
26
- CMD curl -f http://localhost:8000/health || exit 1
27
 
28
- CMD ["uv", "run", "python", "-m", "uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
 
23
  ENV PYTHONPATH="/app/env:$PYTHONPATH"
24
 
25
  HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
26
+ CMD sh -c 'curl -f "http://localhost:${PORT:-7860}/health" || exit 1'
27
 
28
+ CMD ["sh", "-c", "exec uv run python -m uvicorn server.app:app --host 0.0.0.0 --port ${PORT:-7860}"]
README.md CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  # Smart Calendar Resolver — OpenEnv Environment
2
 
3
  A deterministic, multi-step OpenEnv environment for evaluating agent reasoning in real-world scheduling workflows.
 
1
+ ---
2
+ title: Smart Calendar Resolver
3
+ emoji: 📅
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_file: app.py
8
+ pinned: false
9
+ ---
10
+
11
+
12
  # Smart Calendar Resolver — OpenEnv Environment
13
 
14
  A deterministic, multi-step OpenEnv environment for evaluating agent reasoning in real-world scheduling workflows.
calender_en/__pycache__/inference.cpython-314.pyc CHANGED
Binary files a/calender_en/__pycache__/inference.cpython-314.pyc and b/calender_en/__pycache__/inference.cpython-314.pyc differ
 
calender_en/inference.py CHANGED
@@ -2,84 +2,176 @@
2
 
3
  import json
4
  import os
5
- from typing import Any, List
 
 
 
 
6
 
7
  try:
8
- from calender_en.models import CalenderEnAction
 
9
  from calender_en.server.calender_en_environment import CalenderEnEnvironment
10
  except ModuleNotFoundError:
11
- from models import CalenderEnAction
 
12
  from server.calender_en_environment import CalenderEnEnvironment
13
 
14
- from openai import OpenAI
15
-
16
  TASK_NAME = "smart_calendar_resolution"
17
  ENV_NAME = "calender_en"
18
  DEFAULT_MODEL_NAME = "deterministic-baseline"
19
 
20
 
21
- def _policy() -> List[CalenderEnAction]:
22
- return [
23
- CalenderEnAction(
24
- stage="understand_request",
25
- final_note="Identify the meeting objective, participants, and deadline.",
26
- ),
27
- CalenderEnAction(
28
- stage="evaluate_availability",
29
- final_note="Intersect participant availability and filter slots before the deadline.",
30
- ),
31
- CalenderEnAction(
32
- stage="propose_slot",
33
- proposed_time_slot="2026-04-08 10:00-10:30 UTC",
34
- final_note="Choose the earliest common 30 minute slot before the deadline.",
35
- ),
36
- CalenderEnAction(
37
- stage="confirm_schedule",
38
- proposed_time_slot="2026-04-08 10:00-10:30 UTC",
39
- confirm_schedule=True,
40
- final_note="Confirmed with all participants and calendar invite is ready.",
41
- ),
42
- ]
43
 
 
 
44
 
45
- def _env(name: str) -> str | None:
46
- value = os.getenv(name)
47
- if value is None:
 
 
 
 
 
 
48
  return None
49
- value = value.strip()
50
- return value or None
51
 
52
 
53
- def _should_use_llm_proxy() -> bool:
54
- return _env("API_BASE_URL") is not None or _env("API_KEY") is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
 
 
 
 
56
 
57
- def _get_model_name() -> str:
58
- return _env("MODEL_NAME") or DEFAULT_MODEL_NAME
59
 
 
 
60
 
61
- def _create_client() -> OpenAI:
62
- api_base_url = _env("API_BASE_URL")
63
- api_key = _env("API_KEY")
64
- model_name = _env("MODEL_NAME")
65
 
 
66
  missing = [
67
  name
68
  for name, value in (
69
- ("API_BASE_URL", api_base_url),
70
- ("API_KEY", api_key),
71
- ("MODEL_NAME", model_name),
72
  )
73
- if value is None
74
  ]
75
  if missing:
76
- missing_text = ", ".join(missing)
77
  raise RuntimeError(
78
  "Missing required hackathon environment variables: "
79
- f"{missing_text}. The validator injects these automatically."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  )
81
 
82
- return OpenAI(base_url=api_base_url, api_key=api_key)
 
 
 
 
 
83
 
84
 
85
  def _extract_message_text(response: Any) -> str:
@@ -116,7 +208,8 @@ def _parse_action_json(payload: str) -> CalenderEnAction:
116
 
117
  def _generate_action_with_proxy(
118
  client: OpenAI,
119
- observation: Any,
 
120
  planned_action: CalenderEnAction,
121
  ) -> CalenderEnAction:
122
  prompt_payload = {
@@ -129,7 +222,7 @@ def _generate_action_with_proxy(
129
  "planned_action": planned_action.model_dump(),
130
  }
131
  response = client.chat.completions.create(
132
- model=_get_model_name(),
133
  temperature=0,
134
  messages=[
135
  {
@@ -160,51 +253,50 @@ def _format_action(action: CalenderEnAction) -> str:
160
 
161
 
162
  def main() -> None:
163
- env = CalenderEnEnvironment()
 
164
  rewards: List[str] = []
165
  steps = 0
166
  success = False
167
- use_llm_proxy = _should_use_llm_proxy()
168
- client = _create_client() if use_llm_proxy else None
169
- model_name = _get_model_name()
170
 
171
- print(f"[START] task={TASK_NAME} env={ENV_NAME} model={model_name}")
172
 
173
  try:
174
  observation = env.reset()
175
- for planned_action in _policy():
176
- steps += 1
177
- error = "null"
178
  action = planned_action
 
179
  try:
180
- action = (
181
- _generate_action_with_proxy(client, observation, planned_action)
182
- if client is not None
183
- else planned_action
184
- )
185
- observation = env.step(action)
186
- reward_text = f"{observation.reward:.2f}"
187
- done_text = str(observation.done).lower()
188
  rewards.append(reward_text)
189
  print(
190
  f"[STEP] step={steps} action={_format_action(action)} "
191
- f"reward={reward_text} done={done_text} error={error}"
192
  )
193
- success = observation.done
194
  except Exception as exc:
195
- reward_text = "0.00"
196
- rewards.append(reward_text)
197
  print(
198
  f"[STEP] step={steps} action={_format_action(action)} "
199
- f"reward={reward_text} done=false error={str(exc)}"
200
  )
201
  success = False
202
  break
203
  except Exception:
204
  success = False
205
  finally:
206
- rewards_text = ",".join(rewards)
207
- print(f"[END] success={str(success).lower()} steps={steps} rewards={rewards_text}")
 
 
208
 
209
 
210
  if __name__ == "__main__":
 
2
 
3
  import json
4
  import os
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from typing import Any, List, Optional
8
+
9
+ from openai import OpenAI
10
 
11
  try:
12
+ from calender_en.client import CalenderEnEnv
13
+ from calender_en.models import CalenderEnAction, CalenderEnObservation
14
  from calender_en.server.calender_en_environment import CalenderEnEnvironment
15
  except ModuleNotFoundError:
16
+ from client import CalenderEnEnv
17
+ from models import CalenderEnAction, CalenderEnObservation
18
  from server.calender_en_environment import CalenderEnEnvironment
19
 
 
 
20
  TASK_NAME = "smart_calendar_resolution"
21
  ENV_NAME = "calender_en"
22
  DEFAULT_MODEL_NAME = "deterministic-baseline"
23
 
24
 
25
+ @dataclass
26
+ class InferenceConfig:
27
+ env_base_url: str = field(default_factory=lambda: os.getenv("ENV_BASE_URL", ""))
28
+ llm_api_base_url: str = field(default_factory=lambda: os.getenv("API_BASE_URL", ""))
29
+ llm_api_key: str = field(default_factory=lambda: os.getenv("API_KEY", ""))
30
+ model_name: str = field(
31
+ default_factory=lambda: os.getenv("MODEL_NAME", DEFAULT_MODEL_NAME)
32
+ )
33
+
34
+
35
+ @dataclass
36
+ class StepOutcome:
37
+ observation: CalenderEnObservation
38
+ reward: float
39
+ done: bool
40
+
41
+
42
+ class LocalEnvRunner:
43
+ def __init__(self) -> None:
44
+ self._env = CalenderEnEnvironment()
 
 
45
 
46
+ def reset(self) -> CalenderEnObservation:
47
+ return self._env.reset()
48
 
49
+ def step(self, action: CalenderEnAction) -> StepOutcome:
50
+ observation = self._env.step(action)
51
+ return StepOutcome(
52
+ observation=observation,
53
+ reward=float(observation.reward),
54
+ done=bool(observation.done),
55
+ )
56
+
57
+ def close(self) -> None:
58
  return None
 
 
59
 
60
 
61
+ class RemoteEnvRunner:
62
+ def __init__(self, base_url: str) -> None:
63
+ self._client = CalenderEnEnv(base_url=base_url)
64
+
65
+ def reset(self) -> CalenderEnObservation:
66
+ return self._client.reset().observation
67
+
68
+ def step(self, action: CalenderEnAction) -> StepOutcome:
69
+ result = self._client.step(action)
70
+ return StepOutcome(
71
+ observation=result.observation,
72
+ reward=float(result.reward or 0.0),
73
+ done=bool(result.done),
74
+ )
75
+
76
+ def close(self) -> None:
77
+ self._client.close()
78
+
79
 
80
+ def _runner(config: InferenceConfig) -> LocalEnvRunner | RemoteEnvRunner:
81
+ if config.env_base_url:
82
+ return RemoteEnvRunner(config.env_base_url)
83
+ return LocalEnvRunner()
84
 
 
 
85
 
86
+ def _should_use_llm_proxy(config: InferenceConfig) -> bool:
87
+ return bool(config.llm_api_base_url or config.llm_api_key)
88
 
 
 
 
 
89
 
90
+ def _create_proxy_client(config: InferenceConfig) -> OpenAI:
91
  missing = [
92
  name
93
  for name, value in (
94
+ ("API_BASE_URL", config.llm_api_base_url),
95
+ ("API_KEY", config.llm_api_key),
96
+ ("MODEL_NAME", config.model_name),
97
  )
98
+ if not value
99
  ]
100
  if missing:
 
101
  raise RuntimeError(
102
  "Missing required hackathon environment variables: "
103
+ + ", ".join(missing)
104
+ )
105
+ return OpenAI(base_url=config.llm_api_base_url, api_key=config.llm_api_key)
106
+
107
+
108
+ def _common_slots(observation: CalenderEnObservation) -> List[str]:
109
+ participant_slots = [set(slots) for slots in observation.availability.values()]
110
+ if not participant_slots:
111
+ raise ValueError("Observation does not include participant availability.")
112
+ return sorted(set.intersection(*participant_slots))
113
+
114
+
115
+ def _parse_slot_start(slot: str) -> datetime:
116
+ return datetime.strptime(slot[:16], "%Y-%m-%d %H:%M")
117
+
118
+
119
+ def _parse_deadline(deadline: str) -> datetime:
120
+ normalized = deadline.replace(" UTC", "")
121
+ for fmt in ("%Y-%m-%d %H:%M", "%Y-%m-%d"):
122
+ try:
123
+ return datetime.strptime(normalized, fmt)
124
+ except ValueError:
125
+ continue
126
+ raise ValueError(f"Unsupported deadline format: {deadline}")
127
+
128
+
129
+ def _select_slot(observation: CalenderEnObservation) -> str:
130
+ common_slots = _common_slots(observation)
131
+ deadline = observation.constraints.get("deadline")
132
+ if deadline:
133
+ cutoff = _parse_deadline(deadline)
134
+ valid_slots = [slot for slot in common_slots if _parse_slot_start(slot) <= cutoff]
135
+ if valid_slots:
136
+ return valid_slots[0]
137
+ return common_slots[0]
138
+
139
+
140
+ def _fallback_note(stage: str) -> str:
141
+ notes = {
142
+ "understand_request": "Identify the meeting objective, participants, and deadline.",
143
+ "evaluate_availability": "Intersect participant availability and filter slots before the deadline.",
144
+ "propose_slot": "Choose the earliest common 30 minute slot before the deadline.",
145
+ "confirm_schedule": "Confirmed with all participants and calendar invite is ready.",
146
+ }
147
+ return notes[stage]
148
+
149
+
150
+ def _planned_action(observation: CalenderEnObservation) -> CalenderEnAction:
151
+ stage = observation.next_expected_stage
152
+ if stage is None:
153
+ raise ValueError("No next stage available.")
154
+
155
+ if stage == "understand_request":
156
+ return CalenderEnAction(stage=stage, final_note=_fallback_note(stage))
157
+
158
+ if stage == "evaluate_availability":
159
+ return CalenderEnAction(stage=stage, final_note=_fallback_note(stage))
160
+
161
+ slot = _select_slot(observation)
162
+ if stage == "propose_slot":
163
+ return CalenderEnAction(
164
+ stage=stage,
165
+ proposed_time_slot=slot,
166
+ final_note=_fallback_note(stage),
167
  )
168
 
169
+ return CalenderEnAction(
170
+ stage=stage,
171
+ proposed_time_slot=slot,
172
+ confirm_schedule=True,
173
+ final_note=_fallback_note(stage),
174
+ )
175
 
176
 
177
  def _extract_message_text(response: Any) -> str:
 
208
 
209
  def _generate_action_with_proxy(
210
  client: OpenAI,
211
+ config: InferenceConfig,
212
+ observation: CalenderEnObservation,
213
  planned_action: CalenderEnAction,
214
  ) -> CalenderEnAction:
215
  prompt_payload = {
 
222
  "planned_action": planned_action.model_dump(),
223
  }
224
  response = client.chat.completions.create(
225
+ model=config.model_name,
226
  temperature=0,
227
  messages=[
228
  {
 
253
 
254
 
255
  def main() -> None:
256
+ config = InferenceConfig()
257
+ env = _runner(config)
258
  rewards: List[str] = []
259
  steps = 0
260
  success = False
261
+ client = _create_proxy_client(config) if _should_use_llm_proxy(config) else None
 
 
262
 
263
+ print(f"[START] task={TASK_NAME} env={ENV_NAME} model={config.model_name}")
264
 
265
  try:
266
  observation = env.reset()
267
+ while not observation.done and observation.next_expected_stage is not None:
268
+ planned_action = _planned_action(observation)
 
269
  action = planned_action
270
+ steps += 1
271
  try:
272
+ if client is not None:
273
+ action = _generate_action_with_proxy(
274
+ client, config, observation, planned_action
275
+ )
276
+ outcome = env.step(action)
277
+ observation = outcome.observation
278
+ reward_text = f"{outcome.reward:.2f}"
 
279
  rewards.append(reward_text)
280
  print(
281
  f"[STEP] step={steps} action={_format_action(action)} "
282
+ f"reward={reward_text} done={str(outcome.done).lower()} error=null"
283
  )
284
+ success = outcome.done
285
  except Exception as exc:
286
+ rewards.append("0.00")
 
287
  print(
288
  f"[STEP] step={steps} action={_format_action(action)} "
289
+ f"reward=0.00 done=false error={str(exc)}"
290
  )
291
  success = False
292
  break
293
  except Exception:
294
  success = False
295
  finally:
296
+ env.close()
297
+ print(
298
+ f"[END] success={str(success).lower()} steps={steps} rewards={','.join(rewards)}"
299
+ )
300
 
301
 
302
  if __name__ == "__main__":
calender_en/server/__pycache__/app.cpython-314.pyc CHANGED
Binary files a/calender_en/server/__pycache__/app.cpython-314.pyc and b/calender_en/server/__pycache__/app.cpython-314.pyc differ
 
calender_en/server/app.py CHANGED
@@ -71,11 +71,12 @@ def main() -> None:
71
  uvicorn calender_en.server.app:app --workers 4
72
  """
73
  import argparse
 
74
  import uvicorn
75
 
76
  parser = argparse.ArgumentParser()
77
  parser.add_argument("--host", default="0.0.0.0")
78
- parser.add_argument("--port", type=int, default=8000)
79
  args = parser.parse_args()
80
  uvicorn.run(app, host=args.host, port=args.port)
81
 
 
71
  uvicorn calender_en.server.app:app --workers 4
72
  """
73
  import argparse
74
+ import os
75
  import uvicorn
76
 
77
  parser = argparse.ArgumentParser()
78
  parser.add_argument("--host", default="0.0.0.0")
79
+ parser.add_argument("--port", type=int, default=int(os.getenv("PORT", "7860")))
80
  args = parser.parse_args()
81
  uvicorn.run(app, host=args.host, port=args.port)
82
 
server/app.py CHANGED
@@ -1,6 +1,11 @@
1
  from calender_en.server.app import app, main as package_main
2
 
3
 
 
 
 
 
 
4
  def main() -> None:
5
  package_main()
6
 
 
1
  from calender_en.server.app import app, main as package_main
2
 
3
 
4
+ @app.get("/")
5
+ def root():
6
+ return {"message": "Smart Calendar Resolver is running"}
7
+
8
+
9
  def main() -> None:
10
  package_main()
11
 
tests/__pycache__/test_inference.cpython-314-pytest-9.0.2.pyc CHANGED
Binary files a/tests/__pycache__/test_inference.cpython-314-pytest-9.0.2.pyc and b/tests/__pycache__/test_inference.cpython-314-pytest-9.0.2.pyc differ
 
tests/test_inference.py CHANGED
@@ -1,5 +1,6 @@
1
  from contextlib import redirect_stdout
2
  from io import StringIO
 
3
 
4
  from calender_en import inference
5
 
@@ -13,7 +14,10 @@ def test_inference_runs_end_to_end_without_crashing() -> None:
13
  rendered = output.getvalue()
14
  lines = rendered.strip().splitlines()
15
  assert len(lines) == 6
16
- assert lines[0] == "[START] task=smart_calendar_resolution env=calender_en model=deterministic-baseline"
 
 
 
17
  assert lines[-1] == "[END] success=true steps=4 rewards=1.00,1.50,4.00,3.00"
18
  assert all(line.startswith("[STEP]") for line in lines[1:5])
19
 
@@ -30,26 +34,39 @@ def test_inference_output_is_deterministic() -> None:
30
  assert first.getvalue() == second.getvalue()
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def test_inference_uses_hackathon_proxy_env_vars(monkeypatch) -> None:
34
  captured: dict[str, object] = {}
35
 
36
  class FakeResponse:
37
- class Choice:
38
  class Message:
39
- content = (
40
- '{"stage":"understand_request","proposed_time_slot":null,'
41
- '"confirm_schedule":false,'
42
- '"final_note":"Identify the meeting objective, participants, and deadline."}'
43
- )
44
 
45
- message = Message()
 
 
46
 
47
- choices = [Choice()]
48
 
49
  class FakeCompletions:
50
  def create(self, **kwargs):
51
  captured["request"] = kwargs
52
- return FakeResponse()
 
53
 
54
  class FakeChat:
55
  completions = FakeCompletions()
@@ -64,16 +81,6 @@ def test_inference_uses_hackathon_proxy_env_vars(monkeypatch) -> None:
64
  monkeypatch.setenv("API_KEY", "proxy-test-key")
65
  monkeypatch.setenv("MODEL_NAME", "meta-hackathon-model")
66
  monkeypatch.setattr(inference, "OpenAI", FakeOpenAI)
67
- monkeypatch.setattr(
68
- inference,
69
- "_policy",
70
- lambda: [
71
- inference.CalenderEnAction(
72
- stage="understand_request",
73
- final_note="Identify the meeting objective, participants, and deadline.",
74
- )
75
- ],
76
- )
77
 
78
  output = StringIO()
79
  with redirect_stdout(output):
 
1
  from contextlib import redirect_stdout
2
  from io import StringIO
3
+ import json
4
 
5
  from calender_en import inference
6
 
 
14
  rendered = output.getvalue()
15
  lines = rendered.strip().splitlines()
16
  assert len(lines) == 6
17
+ assert (
18
+ lines[0]
19
+ == "[START] task=smart_calendar_resolution env=calender_en model=deterministic-baseline"
20
+ )
21
  assert lines[-1] == "[END] success=true steps=4 rewards=1.00,1.50,4.00,3.00"
22
  assert all(line.startswith("[STEP]") for line in lines[1:5])
23
 
 
34
  assert first.getvalue() == second.getvalue()
35
 
36
 
37
+ def test_inference_reads_model_name_from_environment(monkeypatch) -> None:
38
+ output = StringIO()
39
+ monkeypatch.setenv("MODEL_NAME", "hf-eval-check")
40
+
41
+ with redirect_stdout(output):
42
+ inference.main()
43
+
44
+ lines = output.getvalue().strip().splitlines()
45
+ assert lines[0] == (
46
+ "[START] task=smart_calendar_resolution env=calender_en model=hf-eval-check"
47
+ )
48
+
49
+
50
  def test_inference_uses_hackathon_proxy_env_vars(monkeypatch) -> None:
51
  captured: dict[str, object] = {}
52
 
53
  class FakeResponse:
54
+ def __init__(self, content: str):
55
  class Message:
56
+ def __init__(self, value: str):
57
+ self.content = value
 
 
 
58
 
59
+ class Choice:
60
+ def __init__(self, value: str):
61
+ self.message = Message(value)
62
 
63
+ self.choices = [Choice(content)]
64
 
65
  class FakeCompletions:
66
  def create(self, **kwargs):
67
  captured["request"] = kwargs
68
+ payload = json.loads(kwargs["messages"][1]["content"])
69
+ return FakeResponse(json.dumps(payload["planned_action"]))
70
 
71
  class FakeChat:
72
  completions = FakeCompletions()
 
81
  monkeypatch.setenv("API_KEY", "proxy-test-key")
82
  monkeypatch.setenv("MODEL_NAME", "meta-hackathon-model")
83
  monkeypatch.setattr(inference, "OpenAI", FakeOpenAI)
 
 
 
 
 
 
 
 
 
 
84
 
85
  output = StringIO()
86
  with redirect_stdout(output):
uv.lock CHANGED
@@ -263,12 +263,14 @@ name = "calender-env-v1"
263
  version = "0.1.0"
264
  source = { virtual = "." }
265
  dependencies = [
 
266
  { name = "openenv-core", extra = ["core"] },
267
  { name = "pytest" },
268
  ]
269
 
270
  [package.metadata]
271
  requires-dist = [
 
272
  { name = "openenv-core", extras = ["core"], specifier = ">=0.2.2" },
273
  { name = "pytest", specifier = ">=8.0.0" },
274
  ]
 
263
  version = "0.1.0"
264
  source = { virtual = "." }
265
  dependencies = [
266
+ { name = "openai" },
267
  { name = "openenv-core", extra = ["core"] },
268
  { name = "pytest" },
269
  ]
270
 
271
  [package.metadata]
272
  requires-dist = [
273
+ { name = "openai", specifier = ">=2.30.0" },
274
  { name = "openenv-core", extras = ["core"], specifier = ">=0.2.2" },
275
  { name = "pytest", specifier = ">=8.0.0" },
276
  ]