Shabista Sehar commited on
Commit
7a316cd
·
2 Parent(s): 9e8a491ba1d0d4
Files changed (8) hide show
  1. .env.example +1 -1
  2. .gitattributes +35 -0
  3. README.md +10 -0
  4. __init__.py +3 -0
  5. client.py +11 -0
  6. inference.py +88 -106
  7. server/environment.py +1 -1
  8. validate-submission.sh +33 -0
.env.example CHANGED
@@ -1,4 +1,4 @@
1
- # Container Port OpenEnv Environment Variables
2
  # Copy this file and fill in your actual values before running inference
3
  HF_TOKEN=your_huggingface_token_here
4
  API_BASE_URL=https://router.huggingface.co/v1
 
1
+ # Container Port OpenEnv - Environment Variables
2
  # Copy this file and fill in your actual values before running inference
3
  HF_TOKEN=your_huggingface_token_here
4
  API_BASE_URL=https://router.huggingface.co/v1
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  # Container Port Environment
2
 
3
  An OpenEnv environment for container-yard stack planning at a shipping terminal.
 
1
+ ---
2
+ title: Container Port
3
+ colorFrom: gray
4
+ colorTo: blue
5
+ sdk: docker
6
+ pinned: false
7
+ license: mit
8
+ short_description: A simulation-based RL environment for container management
9
+ ---
10
+
11
  # Container Port Environment
12
 
13
  An OpenEnv environment for container-yard stack planning at a shipping terminal.
__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from models import ContainerAction, ContainerObservation
2
+
3
+ __all__ = ["ContainerAction", "ContainerObservation"]
client.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openenv.core.env_client import EnvClient
2
+
3
+ from models import ContainerAction, ContainerObservation
4
+
5
+
6
+ class ContainerPortEnv(EnvClient[ContainerAction, ContainerObservation]):
7
+ action_cls = ContainerAction
8
+ observation_cls = ContainerObservation
9
+
10
+
11
+ __all__ = ["ContainerPortEnv"]
inference.py CHANGED
@@ -1,6 +1,6 @@
1
- #!/usr/bin/env python3
2
  """
3
- Container Port OpenEnv Baseline Inference Script
4
  SST x Meta PyTorch OpenEnv Hackathon 2026
5
 
6
  Stdout format (grader parses these exactly):
@@ -16,81 +16,78 @@ Usage:
16
  python inference.py --url https://YOUR_USERNAME-container-port-env.hf.space
17
  """
18
 
 
19
  import asyncio
 
20
  import os
21
  import sys
22
- import json
23
  from typing import List, Optional
 
24
  from openai import OpenAI
25
 
26
- # Load .env file if present (before os.getenv calls)
27
- def _load_dotenv():
28
- env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")
29
  if os.path.exists(env_path):
30
- with open(env_path) as f:
31
  for line in f:
32
  line = line.strip()
33
- if not line or line.startswith("#") or "=" not in line:
34
  continue
35
- key, _, value = line.partition("=")
36
  key = key.strip()
37
  value = value.strip().strip('"').strip("'")
38
  if key and key not in os.environ:
39
  os.environ[key] = value
40
 
 
41
  _load_dotenv()
42
 
43
- # Required configuration variables
44
- API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
45
- MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
46
- HF_TOKEN = os.getenv("HF_TOKEN")
47
- LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
48
- API_KEY = HF_TOKEN or os.getenv("API_KEY")
49
- #
50
-
51
- ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
52
- TASK_NAME = "container-stacking"
53
- BENCHMARK = "container-port-env"
54
- MAX_STEPS = 200 # hard mode has 50 containers, safety ceiling
55
- SUCCESS_SCORE_THRESHOLD = 0.5
56
 
 
 
 
 
 
57
 
58
- # Logging helpers (exact SST format)
59
 
60
  def log_start(task: str, env: str, model: str) -> None:
61
- print(f"[START] task={task} env={env} model={model}", flush=True)
62
 
63
 
64
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
65
- error_val = error if error else "null"
66
  done_val = str(done).lower()
67
  print(
68
- f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
69
  flush=True,
70
  )
71
 
72
 
73
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
74
- rewards_str = ",".join(f"{r:.2f}" for r in rewards)
75
  print(
76
- f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
77
  flush=True,
78
  )
79
 
80
 
81
- # Agents
82
-
83
  def greedy_decide(obs: dict) -> int:
84
- stacks = obs["stack_states"]
85
- current = obs.get("current_container")
86
- max_height = obs["max_height"]
87
- upcoming = set(obs.get("upcoming_retrievals", []))
88
 
89
  if current is None:
90
  return 0
91
 
92
- cur_priority = current["priority"]
93
- best_stack, best_score = -1, float("-inf")
94
 
95
  for i, stack in enumerate(stacks):
96
  depth = len(stack)
@@ -101,13 +98,13 @@ def greedy_decide(obs: dict) -> int:
101
  score += accessibility * (4 - cur_priority)
102
 
103
  if depth > 0:
104
- top_p = stack[-1]["priority"]
105
  if cur_priority > top_p:
106
  score -= 10.0 * (cur_priority - top_p)
107
  elif cur_priority < top_p:
108
  score += 3.0
109
 
110
- if current["id"] in upcoming:
111
  score += 5.0 * accessibility
112
 
113
  if depth > 0:
@@ -125,34 +122,36 @@ def greedy_decide(obs: dict) -> int:
125
 
126
 
127
  def llm_decide(obs: dict, client: OpenAI) -> int:
128
- stacks = obs["stack_states"]
129
- current = obs.get("current_container")
130
- n_stacks = obs["n_stacks"]
131
- max_height = obs["max_height"]
132
- upcoming = obs.get("upcoming_retrievals", [])
133
- difficulty = obs.get("difficulty", "medium")
134
 
135
  lines = []
136
  for i, stack in enumerate(stacks):
137
  if not stack:
138
- lines.append(f" Stack {i}: EMPTY (0/{max_height})")
139
  else:
140
- contents = ", ".join(f"{c['id']}(p{c['priority']})" for c in stack)
141
  lines.append(
142
- f" Stack {i}: [{contents}] depth={len(stack)}/{max_height},"
143
  f" top=priority-{stack[-1]['priority']}"
144
  )
145
 
146
  prompt = (
147
- f"You are a container yard planner. Minimize rehandle operations.\n"
148
- f"Priority 1=URGENT (retrieved first), 2=Normal, 3=Low.\n"
149
- f"RULE: containers above the target at retrieval = rehandles (costly).\n\n"
150
- f"DIFFICULTY: {difficulty}\n"
151
  f"UPCOMING RETRIEVALS: {upcoming or 'Unknown (hard mode)'}\n\n"
152
  f"CONTAINER TO PLACE: id={current['id']}, priority={current['priority']}, "
153
  f"weight={current['weight']}kg\n\n"
154
- f"STACKS (bottom->top):\n" + "\n".join(lines) + "\n\n"
155
- f"Reply ONLY with valid JSON: {{\"stack_index\": <int 0-{n_stacks-1}>}}"
 
 
156
  )
157
 
158
  try:
@@ -160,40 +159,34 @@ def llm_decide(obs: dict, client: OpenAI) -> int:
160
  model=MODEL_NAME,
161
  max_tokens=64,
162
  temperature=0.0,
163
- messages=[{"role": "user", "content": prompt}],
164
  )
165
- text = resp.choices[0].message.content.strip()
166
- if "```" in text:
167
- text = text.split("```")[1]
168
- if text.startswith("json"):
169
  text = text[4:]
170
  decision = json.loads(text.strip())
171
- idx = int(decision["stack_index"])
172
- if 0 <= idx < n_stacks and len(obs["stack_states"][idx]) < max_height:
173
  return idx
174
- except Exception as e:
175
- print(f"[DEBUG] LLM fallback: {e}", file=sys.stderr, flush=True)
176
 
177
  return greedy_decide(obs)
178
 
179
 
180
- # Episode runner
181
-
182
- async def run_episode(
183
- url: str,
184
- difficulty: str = "medium",
185
- use_llm: bool = False,
186
- ) -> float:
187
  import websockets
188
 
189
- ws_url = url.replace("http://", "ws://").replace("https://", "wss://")
190
- if not ws_url.endswith("/ws"):
191
- ws_url = ws_url.rstrip("/") + "/ws"
192
 
193
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) if use_llm else None
194
- model_label = MODEL_NAME if use_llm else "greedy"
195
 
196
- log_start(task=f"{TASK_NAME}-{difficulty}", env=BENCHMARK, model=model_label)
197
 
198
  rewards: List[float] = []
199
  steps_taken = 0
@@ -202,47 +195,42 @@ async def run_episode(
202
 
203
  try:
204
  async with websockets.connect(ws_url) as ws:
205
- await ws.send(json.dumps({"type": "reset", "data": {"difficulty": difficulty}}))
206
  resp = json.loads(await ws.recv())
207
- payload = resp.get("data", {})
208
- obs = payload.get("observation", payload)
209
 
210
  for step in range(1, MAX_STEPS + 1):
211
- if obs.get("done", False):
212
  break
213
 
214
  action_idx = llm_decide(obs, client) if use_llm else greedy_decide(obs)
215
 
216
- await ws.send(json.dumps({
217
- "type": "step",
218
- "data": {"stack_index": action_idx},
219
- }))
220
  resp = json.loads(await ws.recv())
221
- payload = resp.get("data", {})
222
- obs = payload.get("observation", payload)
223
- reward = float(payload.get("reward", obs.get("last_reward", 0.0)) or obs.get("last_reward", 0.0))
224
- done = payload.get("done", obs.get("done", False))
225
- error = payload.get("error", None)
226
 
227
  rewards.append(reward)
228
  steps_taken = step
229
-
230
  log_step(step=step, action=str(action_idx), reward=reward, done=done, error=error)
231
 
232
  if done:
233
  break
234
 
235
- # Fetch final score
236
- await ws.send(json.dumps({"type": "state"}))
237
  state_resp = json.loads(await ws.recv())
238
- state = state_resp.get("data", {})
239
- score = float(state.get("score", obs.get("score", 0.0)))
240
  score = min(max(score, 0.0), 1.0)
241
 
242
  success = score >= SUCCESS_SCORE_THRESHOLD
243
 
244
  except Exception as exc:
245
- print(f"[DEBUG] Episode error: {exc}", file=sys.stderr, flush=True)
246
 
247
  finally:
248
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
@@ -251,24 +239,18 @@ async def run_episode(
251
 
252
 
253
  async def run_all(url: str, use_llm: bool = False) -> None:
254
- for diff in ["easy", "medium", "hard"]:
255
  await run_episode(url, difficulty=diff, use_llm=use_llm)
256
 
257
 
258
- # Entry point
259
-
260
- if __name__ == "__main__":
261
- import argparse
262
-
263
- parser = argparse.ArgumentParser(description="Container Port Baseline Agent")
264
- parser.add_argument("--url", default=ENV_URL)
265
- parser.add_argument("--difficulty", default="all",
266
- choices=["easy", "medium", "hard", "all"])
267
- parser.add_argument("--use-llm", action="store_true",
268
- help="Use LLM agent via HF router (requires HF_TOKEN)")
269
  args = parser.parse_args()
270
 
271
- if args.difficulty == "all":
272
  asyncio.run(run_all(args.url, use_llm=args.use_llm))
273
  else:
274
  asyncio.run(run_episode(args.url, difficulty=args.difficulty, use_llm=args.use_llm))
 
1
+ #!/usr/bin/env python3
2
  """
3
+ Container Port OpenEnv - Baseline Inference Script
4
  SST x Meta PyTorch OpenEnv Hackathon 2026
5
 
6
  Stdout format (grader parses these exactly):
 
16
  python inference.py --url https://YOUR_USERNAME-container-port-env.hf.space
17
  """
18
 
19
+ import argparse
20
  import asyncio
21
+ import json
22
  import os
23
  import sys
 
24
  from typing import List, Optional
25
+
26
  from openai import OpenAI
27
 
28
+
29
+ def _load_dotenv() -> None:
30
+ env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '.env')
31
  if os.path.exists(env_path):
32
+ with open(env_path, encoding='utf-8') as f:
33
  for line in f:
34
  line = line.strip()
35
+ if not line or line.startswith('#') or '=' not in line:
36
  continue
37
+ key, _, value = line.partition('=')
38
  key = key.strip()
39
  value = value.strip().strip('"').strip("'")
40
  if key and key not in os.environ:
41
  os.environ[key] = value
42
 
43
+
44
  _load_dotenv()
45
 
46
+ API_BASE_URL = os.getenv('API_BASE_URL', 'https://router.huggingface.co/v1')
47
+ MODEL_NAME = os.getenv('MODEL_NAME', 'meta-llama/Llama-3.1-8B-Instruct')
48
+ HF_TOKEN = os.getenv('HF_TOKEN')
49
+ LOCAL_IMAGE_NAME = os.getenv('LOCAL_IMAGE_NAME')
50
+ API_KEY = HF_TOKEN or os.getenv('API_KEY')
 
 
 
 
 
 
 
 
51
 
52
+ ENV_URL = os.getenv('ENV_URL', 'http://localhost:7860')
53
+ TASK_NAME = 'container-stacking'
54
+ BENCHMARK = 'container-port-env'
55
+ MAX_STEPS = 200
56
+ SUCCESS_SCORE_THRESHOLD = 0.5
57
 
 
58
 
59
  def log_start(task: str, env: str, model: str) -> None:
60
+ print(f'[START] task={task} env={env} model={model}', flush=True)
61
 
62
 
63
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
64
+ error_val = error if error else 'null'
65
  done_val = str(done).lower()
66
  print(
67
+ f'[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}',
68
  flush=True,
69
  )
70
 
71
 
72
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
73
+ rewards_str = ','.join(f'{r:.2f}' for r in rewards)
74
  print(
75
+ f'[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}',
76
  flush=True,
77
  )
78
 
79
 
 
 
80
  def greedy_decide(obs: dict) -> int:
81
+ stacks = obs['stack_states']
82
+ current = obs.get('current_container')
83
+ max_height = obs['max_height']
84
+ upcoming = set(obs.get('upcoming_retrievals', []))
85
 
86
  if current is None:
87
  return 0
88
 
89
+ cur_priority = current['priority']
90
+ best_stack, best_score = -1, float('-inf')
91
 
92
  for i, stack in enumerate(stacks):
93
  depth = len(stack)
 
98
  score += accessibility * (4 - cur_priority)
99
 
100
  if depth > 0:
101
+ top_p = stack[-1]['priority']
102
  if cur_priority > top_p:
103
  score -= 10.0 * (cur_priority - top_p)
104
  elif cur_priority < top_p:
105
  score += 3.0
106
 
107
+ if current['id'] in upcoming:
108
  score += 5.0 * accessibility
109
 
110
  if depth > 0:
 
122
 
123
 
124
  def llm_decide(obs: dict, client: OpenAI) -> int:
125
+ stacks = obs['stack_states']
126
+ current = obs.get('current_container')
127
+ n_stacks = obs['n_stacks']
128
+ max_height = obs['max_height']
129
+ upcoming = obs.get('upcoming_retrievals', [])
130
+ difficulty = obs.get('difficulty', 'medium')
131
 
132
  lines = []
133
  for i, stack in enumerate(stacks):
134
  if not stack:
135
+ lines.append(f' Stack {i}: EMPTY (0/{max_height})')
136
  else:
137
+ contents = ', '.join(f"{c['id']}(p{c['priority']})" for c in stack)
138
  lines.append(
139
+ f' Stack {i}: [{contents}] depth={len(stack)}/{max_height},'
140
  f" top=priority-{stack[-1]['priority']}"
141
  )
142
 
143
  prompt = (
144
+ 'You are a container yard planner. Minimize rehandle operations.\n'
145
+ 'Priority 1=URGENT (retrieved first), 2=Normal, 3=Low.\n'
146
+ 'RULE: containers above the target at retrieval = rehandles (costly).\n\n'
147
+ f'DIFFICULTY: {difficulty}\n'
148
  f"UPCOMING RETRIEVALS: {upcoming or 'Unknown (hard mode)'}\n\n"
149
  f"CONTAINER TO PLACE: id={current['id']}, priority={current['priority']}, "
150
  f"weight={current['weight']}kg\n\n"
151
+ + 'STACKS (bottom->top):\n'
152
+ + '\n'.join(lines)
153
+ + '\n\n'
154
+ + f'Reply ONLY with valid JSON: {{"stack_index": <int 0-{n_stacks - 1}>}}'
155
  )
156
 
157
  try:
 
159
  model=MODEL_NAME,
160
  max_tokens=64,
161
  temperature=0.0,
162
+ messages=[{'role': 'user', 'content': prompt}],
163
  )
164
+ text = (resp.choices[0].message.content or '').strip()
165
+ if '```' in text:
166
+ text = text.split('```')[1]
167
+ if text.startswith('json'):
168
  text = text[4:]
169
  decision = json.loads(text.strip())
170
+ idx = int(decision['stack_index'])
171
+ if 0 <= idx < n_stacks and len(obs['stack_states'][idx]) < max_height:
172
  return idx
173
+ except Exception as exc:
174
+ print(f'[DEBUG] LLM fallback: {exc}', file=sys.stderr, flush=True)
175
 
176
  return greedy_decide(obs)
177
 
178
 
179
+ async def run_episode(url: str, difficulty: str = 'medium', use_llm: bool = False) -> float:
 
 
 
 
 
 
180
  import websockets
181
 
182
+ ws_url = url.replace('http://', 'ws://').replace('https://', 'wss://')
183
+ if not ws_url.endswith('/ws'):
184
+ ws_url = ws_url.rstrip('/') + '/ws'
185
 
186
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) if use_llm else None
187
+ model_label = MODEL_NAME if use_llm else 'greedy'
188
 
189
+ log_start(task=f'{TASK_NAME}-{difficulty}', env=BENCHMARK, model=model_label)
190
 
191
  rewards: List[float] = []
192
  steps_taken = 0
 
195
 
196
  try:
197
  async with websockets.connect(ws_url) as ws:
198
+ await ws.send(json.dumps({'type': 'reset', 'data': {'difficulty': difficulty}}))
199
  resp = json.loads(await ws.recv())
200
+ payload = resp.get('data', {})
201
+ obs = payload.get('observation', payload)
202
 
203
  for step in range(1, MAX_STEPS + 1):
204
+ if obs.get('done', False):
205
  break
206
 
207
  action_idx = llm_decide(obs, client) if use_llm else greedy_decide(obs)
208
 
209
+ await ws.send(json.dumps({'type': 'step', 'data': {'stack_index': action_idx}}))
 
 
 
210
  resp = json.loads(await ws.recv())
211
+ payload = resp.get('data', {})
212
+ obs = payload.get('observation', payload)
213
+ reward = float(payload.get('reward', obs.get('last_reward', 0.0)) or obs.get('last_reward', 0.0))
214
+ done = payload.get('done', obs.get('done', False))
215
+ error = payload.get('error', None)
216
 
217
  rewards.append(reward)
218
  steps_taken = step
 
219
  log_step(step=step, action=str(action_idx), reward=reward, done=done, error=error)
220
 
221
  if done:
222
  break
223
 
224
+ await ws.send(json.dumps({'type': 'state'}))
 
225
  state_resp = json.loads(await ws.recv())
226
+ state = state_resp.get('data', {})
227
+ score = float(state.get('score', obs.get('score', 0.0)))
228
  score = min(max(score, 0.0), 1.0)
229
 
230
  success = score >= SUCCESS_SCORE_THRESHOLD
231
 
232
  except Exception as exc:
233
+ print(f'[DEBUG] Episode error: {exc}', file=sys.stderr, flush=True)
234
 
235
  finally:
236
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
 
239
 
240
 
241
  async def run_all(url: str, use_llm: bool = False) -> None:
242
+ for diff in ['easy', 'medium', 'hard']:
243
  await run_episode(url, difficulty=diff, use_llm=use_llm)
244
 
245
 
246
+ if __name__ == '__main__':
247
+ parser = argparse.ArgumentParser(description='Container Port Baseline Agent')
248
+ parser.add_argument('--url', default=ENV_URL)
249
+ parser.add_argument('--difficulty', default='all', choices=['easy', 'medium', 'hard', 'all'])
250
+ parser.add_argument('--use-llm', action='store_true', help='Use LLM agent via HF router (requires HF_TOKEN)')
 
 
 
 
 
 
251
  args = parser.parse_args()
252
 
253
+ if args.difficulty == 'all':
254
  asyncio.run(run_all(args.url, use_llm=args.use_llm))
255
  else:
256
  asyncio.run(run_episode(args.url, difficulty=args.difficulty, use_llm=args.use_llm))
server/environment.py CHANGED
@@ -192,7 +192,7 @@ class ContainerYardEnvironment(Environment):
192
  self.rehandle_count += rehandles
193
  stack.pop(i)
194
  return round(rehandles * 0.4, 4)
195
- return 0.0 # container not yet in yard no penalty
196
 
197
  def _get_upcoming_retrievals(self) -> list[str]:
198
  start = self.retrieval_pointer
 
192
  self.rehandle_count += rehandles
193
  stack.pop(i)
194
  return round(rehandles * 0.4, 4)
195
+ return 0.0 # container not yet in yard - no penalty
196
 
197
  def _get_upcoming_retrievals(self) -> list[str]:
198
  start = self.retrieval_pointer
validate-submission.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ PING_URL="${1:-}"
5
+ REPO_DIR="${2:-.}"
6
+
7
+ if [[ -z "${PING_URL}" ]]; then
8
+ echo "Usage: ./validate-submission.sh <ping_url> [repo_dir]"
9
+ exit 1
10
+ fi
11
+
12
+ cd "${REPO_DIR}"
13
+
14
+ echo "== Health =="
15
+ curl -fsS "${PING_URL%/}/health"
16
+ echo
17
+
18
+ echo "== Reset =="
19
+ curl -fsS -X POST "${PING_URL%/}/reset" \
20
+ -H "Content-Type: application/json" \
21
+ -d '{"difficulty":"easy"}'
22
+ echo
23
+
24
+ echo "== OpenEnv Validate =="
25
+ openenv validate --url "${PING_URL%/}"
26
+
27
+ echo "== Docker Build =="
28
+ docker build -t container-port-env .
29
+
30
+ echo "== Inference =="
31
+ python inference.py --url "${PING_URL%/}" --difficulty all
32
+
33
+ echo "Validation complete."