Spaces:

Draken1606
/

Container-Port

Sleeping

App Files Files Community

Shabista Sehar commited on about 1 month ago

Commit

11f9523

0 Parent(s):

Initial: Container Port OpenEnv

Browse files

Files changed (21) hide show

Dockerfile +14 -0
README.md +71 -0
client/__init__.py +1 -0
client/container_env.py +37 -0
inference.py +224 -0
openenv.yaml +30 -0
pyproject.toml +17 -0
requirements.txt +8 -0
server/__init__.py +1 -0
server/__pycache__/__init__.cpython-313.pyc.1828594663216 +0 -0
server/__pycache__/__init__.cpython-313.pyc.2347090562224 +0 -0
server/__pycache__/environment.cpython-313.pyc.1828594650032 +0 -0
server/__pycache__/environment.cpython-313.pyc.2347106154928 +0 -0
server/__pycache__/models.cpython-313.pyc.2347106454640 +0 -0
server/__pycache__/server.cpython-313.pyc.2347090514912 +0 -0
server/environment.py +211 -0
server/models.py +36 -0
server/server.py +84 -0
tests/__pycache__/test_env.cpython-313-pytest-9.0.2.pyc.10200 +0 -0
tests/__pycache__/test_env.cpython-313-pytest-9.0.2.pyc.16772 +0 -0
tests/test_env.py +110 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+ENV PYTHONPATH=/app
+EXPOSE 7860
+CMD ["uvicorn", "server.server:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,71 @@

+# Container Port Environment
+An OpenEnv-compatible RL environment for container yard management at a shipping terminal.
+## Task
+A ship arrives with N containers (priority 1=urgent, 2=normal, 3=low). The agent places each into
+stacks. At regular intervals, specific containers are retrieved. If a target is buried under others,
+each container above it is a **rehandle** — expensive in real port operations.
+**Goal: minimize total rehandle operations across the episode.**
+## Difficulty Levels
+| Parameter          | Easy     | Medium   | Hard     |
+|--------------------|----------|----------|----------|
+| Stacks             | 6        | 8        | 10       |
+| Max stack height   | 4        | 5        | 6        |
+| Containers         | 20       | 35       | 50       |
+| Retrieval interval | every 5  | every 5  | every 4  |
+| Lookahead shown    | 5        | 3        | 0        |
+## Reward
+| Event | Reward |
+|---|---|
+| Accessible placement of priority-1 (near top) | up to +0.45 |
+| General placement | +0.03 to +0.30 |
+| Burying high-priority under low-priority | -0.10 to -0.20 |
+| Invalid action (full stack / bad index) | -2.0 |
+| Each rehandle at retrieval time | -0.40 |
+## Score
+`score = 1.0 - (actual_rehandles / worst_case_rehandles)`, in [0.0, 1.0].
+## Setup
+```bash
+pip install -r requirements.txt
+uvicorn server.server:app --host 0.0.0.0 --port 7860
+```
+## Run inference
+```bash
+# Greedy agent, all difficulties
+python inference.py --difficulty all
+# LLM agent (requires HF token in env)
+export HF_TOKEN=hf_your_token_here
+python inference.py --use-llm --difficulty all
+# Against deployed HF Space
+python inference.py --url https://YOUR_USERNAME-container-port-env.hf.space --difficulty all
+```
+## Docker
+```bash
+docker build -t container-port-env .
+docker run -p 7860:7860 container-port-env
+```
+## API
+- `GET /ping` — health check
+- `GET /health` — server stats
+- `WS /ws` — WebSocket interface
+WebSocket messages:
+- `{"type": "reset", "difficulty": "easy"}` — start episode
+- `{"type": "step", "action": {"stack_index": 2}}` — place container
+- `{"type": "state"}` — get full state with score

client/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

client/container_env.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import json
+import websockets
+from typing import Any, Dict, Tuple
+class ContainerEnvClient:
+    """Async client for Container Port OpenEnv."""
+    def __init__(self, base_url: str = "http://localhost:7860"):
+        ws_url = base_url.replace("http://", "ws://").replace("https://", "wss://")
+        self.ws_url = ws_url.rstrip("/") + "/ws"
+        self._ws = None
+    async def __aenter__(self):
+        self._ws = await websockets.connect(self.ws_url)
+        return self
+    async def __aexit__(self, *args):
+        if self._ws:
+            await self._ws.close()
+    async def reset(self, difficulty: str = "medium") -> Dict[str, Any]:
+        await self._ws.send(json.dumps({"type": "reset", "difficulty": difficulty}))
+        resp = json.loads(await self._ws.recv())
+        return resp["observation"]
+    async def step(self, stack_index: int) -> Tuple[Dict, float, bool, Dict]:
+        await self._ws.send(json.dumps({
+            "type": "step",
+            "action": {"stack_index": stack_index}
+        }))
+        resp = json.loads(await self._ws.recv())
+        return resp["observation"], resp["reward"], resp["done"], resp.get("info", {})
+    async def state(self) -> Dict[str, Any]:
+        await self._ws.send(json.dumps({"type": "state"}))
+        resp = json.loads(await self._ws.recv())
+        return resp["state"]

inference.py ADDED Viewed

	@@ -0,0 +1,224 @@

+#!/usr/bin/env python3
+"""
+Container Port OpenEnv — Baseline Inference Script
+SST x Meta PyTorch OpenEnv Hackathon
+Required environment variables (or set below):
+  HF_TOKEN      - Your Hugging Face token
+  API_BASE_URL  - LLM API endpoint (default: https://router.huggingface.co/v1)
+  MODEL_NAME    - Model identifier (default: meta-llama/Llama-3.1-8B-Instruct)
+Usage:
+  python inference.py
+  python inference.py --url https://YOUR_USERNAME-container-port-env.hf.space --difficulty all
+  python inference.py --difficulty easy
+"""
+import os
+import sys
+import json
+import asyncio
+import argparse
+import websockets
+from openai import OpenAI
+#  Required configuration variables
+API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+MODEL_NAME   = os.getenv("MODEL_NAME",   "meta-llama/Llama-3.1-8B-Instruct")
+HF_TOKEN     = os.getenv("HF_TOKEN",     "")   # set your HF token here or via env var
+#
+ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
+def _llm_client() -> OpenAI:
+    """Return an OpenAI-compatible client pointed at HF Inference Router."""
+    return OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
+def greedy_decide(obs: dict) -> int:
+    """
+    Greedy heuristic agent — no LLM call.
+    Scores each valid stack by accessibility and priority compatibility.
+    """
+    stacks    = obs["stack_states"]
+    current   = obs.get("current_container")
+    max_height = obs["max_height"]
+    upcoming  = set(obs.get("upcoming_retrievals", []))
+    if current is None:
+        return 0
+    cur_priority = current["priority"]
+    best_stack, best_score = -1, float("-inf")
+    for i, stack in enumerate(stacks):
+        depth = len(stack)
+        if depth >= max_height:
+            continue
+        score = 0.0
+        accessibility = (max_height - depth) / max_height
+        score += accessibility * (4 - cur_priority)
+        if depth > 0:
+            top_priority = stack[-1]["priority"]
+            if cur_priority > top_priority:
+                score -= 10.0 * (cur_priority - top_priority)
+            elif cur_priority < top_priority:
+                score += 3.0
+        if current["id"] in upcoming:
+            score += 5.0 * accessibility
+        if depth > 0:
+            score += 0.5
+        if score > best_score:
+            best_score = score
+            best_stack = i
+    if best_stack == -1:
+        for i, stack in enumerate(stacks):
+            if len(stack) < max_height:
+                return i
+    return best_stack
+def llm_decide(obs: dict) -> int:
+    """Use HF-hosted LLM via OpenAI-compatible client to choose a stack."""
+    stacks    = obs["stack_states"]
+    current   = obs.get("current_container")
+    n_stacks  = obs["n_stacks"]
+    max_height = obs["max_height"]
+    upcoming  = obs.get("upcoming_retrievals", [])
+    difficulty = obs.get("difficulty", "medium")
+    stack_lines = []
+    for i, stack in enumerate(stacks):
+        if not stack:
+            stack_lines.append(f"  Stack {i}: EMPTY (0/{max_height})")
+        else:
+            contents = ", ".join(f"{c['id']}(p{c['priority']})" for c in stack)
+            stack_lines.append(
+                f"  Stack {i}: [{contents}] depth={len(stack)}/{max_height},"
+                f" top=priority-{stack[-1]['priority']}"
+            )
+    prompt = (
+        f"You are an expert container yard planner.\n"
+        f"TASK: Place the incoming container into a stack to MINIMIZE future rehandle operations.\n"
+        f"RULE: When a container is retrieved, every container ON TOP of it must be moved (rehandle).\n"
+        f"Priority 1=URGENT (retrieved first), 2=Normal, 3=Low (retrieved last).\n\n"
+        f"DIFFICULTY: {difficulty}\n"
+        f"UPCOMING RETRIEVALS (next to be retrieved, in order): "
+        f"{upcoming if upcoming else 'Unknown (hard mode)'}\n\n"
+        f"CONTAINER TO PLACE: id={current['id']}, priority={current['priority']}, "
+        f"weight={current['weight']}kg\n\n"
+        f"STACK STATES (bottomtop):\n" + "\n".join(stack_lines) + "\n\n"
+        f"Respond with ONLY valid JSON: {{\"stack_index\": <integer 0-{n_stacks-1}>}}"
+    )
+    try:
+        client = _llm_client()
+        response = client.chat.completions.create(
+            model=MODEL_NAME,
+            max_tokens=64,
+            temperature=0.0,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        text = response.choices[0].message.content.strip()
+        # strip markdown fences if model wraps in ```json ... ```
+        if "```" in text:
+            text = text.split("```")[1]
+            if text.startswith("json"):
+                text = text[4:]
+        decision = json.loads(text.strip())
+        idx = int(decision["stack_index"])
+        if 0 <= idx < n_stacks and len(obs["stack_states"][idx]) < max_height:
+            return idx
+    except Exception as e:
+        print(f"  [LLM fallback: {e}]", file=sys.stderr)
+    return greedy_decide(obs)
+async def run_episode(url: str, difficulty: str = "medium", use_llm: bool = False) -> float:
+    ws_url = url.replace("http://", "ws://").replace("https://", "wss://")
+    if not ws_url.endswith("/ws"):
+        ws_url = ws_url.rstrip("/") + "/ws"
+    #  [START] log
+    print(json.dumps({"type": "[START]", "task": difficulty, "difficulty": difficulty,
+                      "env_url": url, "model": MODEL_NAME if use_llm else "greedy"}))
+    sys.stdout.flush()
+    total_reward = 0.0
+    step = 0
+    async with websockets.connect(ws_url) as ws:
+        await ws.send(json.dumps({"type": "reset", "difficulty": difficulty}))
+        resp = json.loads(await ws.recv())
+        obs  = resp["observation"]
+        while not obs.get("done", False):
+            action_idx = llm_decide(obs) if use_llm else greedy_decide(obs)
+            await ws.send(json.dumps({"type": "step", "action": {"stack_index": action_idx}}))
+            resp  = json.loads(await ws.recv())
+            obs   = resp["observation"]
+            reward = resp["reward"]
+            done  = resp["done"]
+            total_reward += reward
+            step += 1
+            #  [STEP] log
+            print(json.dumps({
+                "type": "[STEP]",
+                "step": step,
+                "action": action_idx,
+                "reward": round(reward, 4),
+                "total_reward": round(total_reward, 4),
+                "done": done,
+                "rehandle_count": obs["rehandle_count"],
+            }))
+            sys.stdout.flush()
+        # fetch final state for score
+        await ws.send(json.dumps({"type": "state"}))
+        state_resp = json.loads(await ws.recv())
+        state = state_resp["state"]
+    final_score = state.get("score", 0.0)
+    #  [END] log
+    print(json.dumps({
+        "type": "[END]",
+        "task": difficulty,
+        "difficulty": difficulty,
+        "total_reward": round(total_reward, 4),
+        "final_score": final_score,
+        "total_steps": step,
+        "rehandle_count": state.get("rehandle_count", 0),
+    }))
+    sys.stdout.flush()
+    return final_score
+async def run_all(url: str, use_llm: bool = False):
+    for diff in ["easy", "medium", "hard"]:
+        await run_episode(url, difficulty=diff, use_llm=use_llm)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Container Port Baseline Agent")
+    parser.add_argument("--url",        default=ENV_URL)
+    parser.add_argument("--difficulty", default="all", choices=["easy", "medium", "hard", "all"])
+    parser.add_argument("--use-llm",    action="store_true")
+    args = parser.parse_args()
+    if args.difficulty == "all":
+        asyncio.run(run_all(args.url, use_llm=args.use_llm))
+    else:
+        asyncio.run(run_episode(args.url, difficulty=args.difficulty, use_llm=args.use_llm))

openenv.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+name: container-port-env
+version: "0.1.0"
+description: >
+  Container terminal yard RL environment. An agent places incoming ship
+  containers into stacks of limited height to minimize costly rehandle
+  operations during retrieval. Features 3 difficulty levels (easy/medium/hard)
+  with different stack configurations, retrieval frequencies, and lookahead
+  visibility. Models real port logistics decision-making.
+tags:
+  - logistics
+  - planning
+  - real-world
+  - combinatorial-optimization
+sdk: docker
+entry_point: server.server:app
+tools:
+  - name: place_container
+    description: >
+      Place the current incoming container into a specified stack index.
+      Priority 1=urgent (retrieved first), 2=normal, 3=low (retrieved last).
+      Burying high-priority under low-priority causes rehandle costs.
+    input_schema:
+      type: object
+      properties:
+        stack_index:
+          type: integer
+          description: "Zero-indexed stack to place the container into"
+          minimum: 0
+      required:
+        - stack_index

pyproject.toml ADDED Viewed

	@@ -0,0 +1,17 @@

+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-container-port"
+version = "0.1.0"
+description = "Container yard RL environment for OpenEnv hackathon"
+requires-python = ">=3.10"
+dependencies = [
+    "fastapi>=0.110.0",
+    "uvicorn[standard]>=0.29.0",
+    "websockets>=12.0",
+    "pydantic>=2.0.0",
+    "openenv-core>=0.1.0",
+    "openai>=1.0.0",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi>=0.110.0
+uvicorn[standard]>=0.29.0
+websockets>=12.0
+pydantic>=2.0.0
+openenv-core>=0.1.0
+openai>=1.0.0
+pytest>=8.0.0
+huggingface_hub>=0.20.0

server/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

server/__pycache__/__init__.cpython-313.pyc.1828594663216 ADDED Viewed

Binary file (145 Bytes). View file

server/__pycache__/__init__.cpython-313.pyc.2347090562224 ADDED Viewed

Binary file (145 Bytes). View file

server/__pycache__/environment.cpython-313.pyc.1828594650032 ADDED Viewed

Binary file (11.4 kB). View file

server/__pycache__/environment.cpython-313.pyc.2347106154928 ADDED Viewed

Binary file (11.4 kB). View file

server/__pycache__/models.cpython-313.pyc.2347106454640 ADDED Viewed

Binary file (2.34 kB). View file

server/__pycache__/server.cpython-313.pyc.2347090514912 ADDED Viewed

Binary file (3.88 kB). View file

server/environment.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import random
+from dataclasses import dataclass
+from typing import List, Optional, Dict, Any, Tuple
+@dataclass
+class Container:
+    id: str
+    priority: int   # 1=urgent, 2=normal, 3=low
+    weight: float
+DIFFICULTY_CONFIG = {
+    "easy": {
+        "n_stacks": 6,
+        "max_height": 4,
+        "n_containers": 20,
+        "retrieval_interval": 5,
+        "lookahead": 5,
+        "priority_weights": [0.4, 0.4, 0.2],
+    },
+    "medium": {
+        "n_stacks": 8,
+        "max_height": 5,
+        "n_containers": 35,
+        "retrieval_interval": 5,
+        "lookahead": 3,
+        "priority_weights": [0.33, 0.34, 0.33],
+    },
+    "hard": {
+        "n_stacks": 10,
+        "max_height": 6,
+        "n_containers": 50,
+        "retrieval_interval": 4,
+        "lookahead": 0,
+        "priority_weights": [0.25, 0.35, 0.40],
+    },
+}
+class ContainerYardEnv:
+    def __init__(self, difficulty: str = "medium", seed: Optional[int] = None):
+        assert difficulty in DIFFICULTY_CONFIG, f"difficulty must be one of {list(DIFFICULTY_CONFIG.keys())}"
+        self.difficulty = difficulty
+        self.seed = seed
+        cfg = DIFFICULTY_CONFIG[difficulty]
+        self.n_stacks = cfg["n_stacks"]
+        self.max_height = cfg["max_height"]
+        self.n_containers = cfg["n_containers"]
+        self.retrieval_interval = cfg["retrieval_interval"]
+        self.lookahead = cfg["lookahead"]
+        self.priority_weights = cfg["priority_weights"]
+        self.reset()
+    def reset(self) -> Dict[str, Any]:
+        if self.seed is not None:
+            random.seed(self.seed)
+        self.stacks: List[List[Container]] = [[] for _ in range(self.n_stacks)]
+        self.rehandle_count = 0
+        self.step_count = 0
+        self.total_reward = 0.0
+        self.done = False
+        self.manifest: List[Container] = self._generate_manifest()
+        self.retrieval_queue: List[str] = self._generate_retrieval_queue()
+        self.retrieval_pointer = 0
+        self.current_idx = 0
+        return self._observe(last_reward=0.0)
+    def _generate_manifest(self) -> List[Container]:
+        containers = []
+        for i in range(self.n_containers):
+            priority = random.choices([1, 2, 3], weights=self.priority_weights)[0]
+            containers.append(Container(
+                id=f"C{i:03d}",
+                priority=priority,
+                weight=round(random.uniform(5.0, 30.0), 1)
+            ))
+        return containers
+    def _generate_retrieval_queue(self) -> List[str]:
+        ids_by_priority = {1: [], 2: [], 3: []}
+        for c in self.manifest:
+            ids_by_priority[c.priority].append(c.id)
+        for p in ids_by_priority:
+            random.shuffle(ids_by_priority[p])
+        queue = ids_by_priority[1] + ids_by_priority[2] + ids_by_priority[3]
+        return queue
+    def step(self, stack_index: int) -> Tuple[Dict[str, Any], float, bool, Dict[str, Any]]:
+        if self.done:
+            return self._observe(0.0), 0.0, True, {"error": "episode already done"}
+        if stack_index < 0 or stack_index >= self.n_stacks:
+            reward = -2.0
+            self.total_reward += reward
+            return self._observe(reward), reward, False, {"error": f"invalid stack_index {stack_index}, must be 0-{self.n_stacks-1}"}
+        if len(self.stacks[stack_index]) >= self.max_height:
+            reward = -2.0
+            self.total_reward += reward
+            return self._observe(reward), reward, False, {"error": f"stack {stack_index} is full (height {self.max_height})"}
+        current = self.manifest[self.current_idx]
+        self.stacks[stack_index].append(current)
+        placement_reward = self._placement_reward(stack_index, current)
+        self.current_idx += 1
+        self.step_count += 1
+        retrieval_cost = 0.0
+        retrievals_done = []
+        if self.step_count % self.retrieval_interval == 0:
+            cost, done_ids = self._trigger_retrieval()
+            retrieval_cost = cost
+            retrievals_done = done_ids
+        reward = placement_reward - retrieval_cost
+        self.total_reward += reward
+        self.done = (self.current_idx >= len(self.manifest))
+        return self._observe(reward), reward, self.done, {
+            "rehandles": self.rehandle_count,
+            "step": self.step_count,
+            "placement_reward": round(placement_reward, 4),
+            "retrieval_cost": round(retrieval_cost, 4),
+            "retrievals_done": retrievals_done,
+        }
+    def _placement_reward(self, stack_index: int, container: Container) -> float:
+        # stack_depth = zero-based index of the just-placed container
+        stack_depth = len(self.stacks[stack_index]) - 1
+        accessibility = (self.max_height - stack_depth) / self.max_height
+        priority_weight = (4 - container.priority) / 3.0  # priority 11.0, 20.67, 30.33
+        base = 0.3 * accessibility * priority_weight
+        # Bonus: high-priority container placed near top (accessible for fast retrieval)
+        if container.priority == 1 and stack_depth <= 1:
+            base += 0.15
+        # Penalty: placing lower-priority on top of higher-priority container (causes future rehandles)
+        if stack_depth > 0:
+            top_container = self.stacks[stack_index][-2]  # container directly below
+            if container.priority > top_container.priority:
+                base -= 0.2 * (container.priority - top_container.priority) / 2.0
+        return round(base, 4)
+    def _trigger_retrieval(self) -> Tuple[float, List[str]]:
+        total_cost = 0.0
+        done_ids = []
+        for _ in range(2):
+            if self.retrieval_pointer >= len(self.retrieval_queue):
+                break
+            target_id = self.retrieval_queue[self.retrieval_pointer]
+            self.retrieval_pointer += 1
+            cost = self._retrieve(target_id)
+            total_cost += cost
+            done_ids.append(target_id)
+        return total_cost, done_ids
+    def _retrieve(self, target_id: str) -> float:
+        for stack in self.stacks:
+            for i, c in enumerate(stack):
+                if c.id == target_id:
+                    rehandles = len(stack) - 1 - i  # containers above target
+                    self.rehandle_count += rehandles
+                    stack.pop(i)
+                    return round(rehandles * 0.4, 4)
+        return 0.0  # container not yet in yard — no penalty
+    def _get_upcoming_retrievals(self) -> List[str]:
+        start = self.retrieval_pointer
+        end = min(start + self.lookahead, len(self.retrieval_queue))
+        return self.retrieval_queue[start:end]
+    def _observe(self, last_reward: float = 0.0) -> Dict[str, Any]:
+        stack_states = []
+        for s in self.stacks:
+            stack_states.append([{"id": c.id, "priority": c.priority} for c in s])
+        current = None
+        if self.current_idx < len(self.manifest):
+            c = self.manifest[self.current_idx]
+            current = {"id": c.id, "priority": c.priority, "weight": c.weight}
+        return {
+            "stack_states": stack_states,
+            "current_container": current,
+            "upcoming_retrievals": self._get_upcoming_retrievals(),
+            "rehandle_count": self.rehandle_count,
+            "step": self.step_count,
+            "containers_remaining": len(self.manifest) - self.current_idx,
+            "n_stacks": self.n_stacks,
+            "max_height": self.max_height,
+            "difficulty": self.difficulty,
+            "last_reward": last_reward,
+            "done": self.done,
+        }
+    def get_state(self) -> Dict[str, Any]:
+        obs = self._observe()
+        obs["score"] = self.score()
+        obs["total_reward"] = round(self.total_reward, 4)
+        return obs
+    def score(self) -> float:
+        """Normalized score in [0.0, 1.0]. Based on actual retrievals attempted."""
+        n_retrieved = self.retrieval_pointer  # only count retrievals that actually happened
+        worst_case = n_retrieved * (self.max_height - 1)
+        if worst_case == 0:
+            return 1.0
+        score = max(0.0, 1.0 - self.rehandle_count / worst_case)
+        return round(min(score, 1.0), 4)

server/models.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional, Dict, Any
+class ContainerInfo(BaseModel):
+    id: str
+    priority: int = Field(..., ge=1, le=3)
+    weight: float
+class StackEntry(BaseModel):
+    id: str
+    priority: int
+class ContainerAction(BaseModel):
+    stack_index: int = Field(..., description="Which stack (0-indexed) to place the current container into")
+class ContainerObservation(BaseModel):
+    stack_states: List[List[Dict[str, Any]]]
+    current_container: Optional[Dict[str, Any]]
+    upcoming_retrievals: List[str]
+    rehandle_count: int
+    step: int
+    containers_remaining: int
+    n_stacks: int
+    max_height: int
+    difficulty: str
+    last_reward: float
+    done: bool
+class ContainerState(BaseModel):
+    stack_states: List[List[Dict[str, Any]]]
+    rehandle_count: int
+    step: int
+    score: float
+    difficulty: str
+    done: bool
+    total_reward: float

server/server.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import json
+import uuid
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from server.environment import ContainerYardEnv
+from server.models import ContainerAction
+app = FastAPI(title="Container Port OpenEnv", version="0.1.0")
+sessions: dict = {}
+@app.get("/ping")
+def ping():
+    return {"status": "ok", "env": "container-port-env"}
+@app.get("/health")
+def health():
+    return {
+        "status": "healthy",
+        "active_sessions": len(sessions),
+        "difficulties": ["easy", "medium", "hard"],
+    }
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    session_id = str(uuid.uuid4())
+    sessions[session_id] = ContainerYardEnv(difficulty="medium")
+    try:
+        while True:
+            raw = await websocket.receive_text()
+            msg = json.loads(raw)
+            msg_type = msg.get("type")
+            env = sessions[session_id]
+            if msg_type == "reset":
+                difficulty = msg.get("difficulty", "medium")
+                if difficulty not in ["easy", "medium", "hard"]:
+                    difficulty = "medium"
+                sessions[session_id] = ContainerYardEnv(difficulty=difficulty)
+                env = sessions[session_id]
+                obs = env.reset()
+                await websocket.send_text(json.dumps({
+                    "type": "reset",
+                    "observation": obs,
+                    "reward": 0.0,
+                    "done": False,
+                    "session_id": session_id,
+                }))
+            elif msg_type == "step":
+                try:
+                    action = ContainerAction(**msg["action"])
+                    obs, reward, done, info = env.step(action.stack_index)
+                    await websocket.send_text(json.dumps({
+                        "type": "step",
+                        "observation": obs,
+                        "reward": reward,
+                        "done": done,
+                        "info": info,
+                    }))
+                except Exception as e:
+                    await websocket.send_text(json.dumps({
+                        "type": "error",
+                        "message": str(e),
+                    }))
+            elif msg_type == "state":
+                state = env.get_state()
+                await websocket.send_text(json.dumps({
+                    "type": "state",
+                    "state": state,
+                }))
+            else:
+                await websocket.send_text(json.dumps({
+                    "type": "error",
+                    "message": f"Unknown message type: {msg_type}",
+                }))
+    except WebSocketDisconnect:
+        pass
+    finally:
+        sessions.pop(session_id, None)

tests/__pycache__/test_env.cpython-313-pytest-9.0.2.pyc.10200 ADDED Viewed

Binary file (18.9 kB). View file

tests/__pycache__/test_env.cpython-313-pytest-9.0.2.pyc.16772 ADDED Viewed

Binary file (18.9 kB). View file

tests/test_env.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import pytest
+from server.environment import ContainerYardEnv, DIFFICULTY_CONFIG
+@pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])
+def test_reset_returns_valid_obs(difficulty):
+    env = ContainerYardEnv(difficulty=difficulty, seed=42)
+    obs = env.reset()
+    cfg = DIFFICULTY_CONFIG[difficulty]
+    assert len(obs["stack_states"]) == cfg["n_stacks"]
+    assert obs["current_container"] is not None
+    assert obs["step"] == 0
+    assert obs["rehandle_count"] == 0
+    assert obs["difficulty"] == difficulty
+    assert obs["done"] == False
+@pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])
+def test_step_valid_action(difficulty):
+    env = ContainerYardEnv(difficulty=difficulty, seed=42)
+    env.reset()
+    obs, reward, done, info = env.step(0)
+    assert isinstance(reward, float)
+    assert obs["step"] == 1
+    assert len(obs["stack_states"][0]) == 1
+    assert "rehandles" in info
+@pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])
+def test_step_invalid_stack_index(difficulty):
+    env = ContainerYardEnv(difficulty=difficulty, seed=42)
+    env.reset()
+    obs, reward, done, info = env.step(999)
+    assert reward == -2.0
+    assert "error" in info
+    assert done == False
+@pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])
+def test_full_episode_completes(difficulty):
+    env = ContainerYardEnv(difficulty=difficulty, seed=42)
+    env.reset()
+    done = False
+    steps = 0
+    cfg = DIFFICULTY_CONFIG[difficulty]
+    n_stacks   = cfg["n_stacks"]
+    max_height = cfg["max_height"]
+    while not done:
+        stacks = env._observe()["stack_states"]
+        chosen = 0
+        for i in range(n_stacks):
+            if len(stacks[i]) < max_height:
+                chosen = i
+                break
+        _, _, done, _ = env.step(chosen)
+        steps += 1
+        assert steps < 1000, "Episode did not complete in time"
+    assert done
+@pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])
+def test_score_in_range(difficulty):
+    env = ContainerYardEnv(difficulty=difficulty, seed=42)
+    env.reset()
+    done = False
+    cfg = DIFFICULTY_CONFIG[difficulty]
+    n_stacks   = cfg["n_stacks"]
+    max_height = cfg["max_height"]
+    while not done:
+        stacks = env._observe()["stack_states"]
+        chosen = 0
+        for i in range(n_stacks):
+            if len(stacks[i]) < max_height:
+                chosen = i
+                break
+        _, _, done, _ = env.step(chosen)
+    score = env.score()
+    assert 0.0 <= score <= 1.0
+def test_lookahead_visibility():
+    easy_env = ContainerYardEnv(difficulty="easy", seed=42)
+    hard_env = ContainerYardEnv(difficulty="hard", seed=42)
+    easy_obs = easy_env.reset()
+    hard_obs = hard_env.reset()
+    assert len(easy_obs["upcoming_retrievals"]) > len(hard_obs["upcoming_retrievals"])
+    assert len(hard_obs["upcoming_retrievals"]) == 0
+def test_reward_is_dense():
+    env = ContainerYardEnv(difficulty="medium", seed=42)
+    env.reset()
+    rewards = []
+    done = False
+    step = 0
+    while not done and step < 20:
+        stacks = env._observe()["stack_states"]
+        chosen = step % 8
+        if len(stacks[chosen]) >= 5:
+            chosen = 0
+        _, r, done, _ = env.step(chosen)
+        rewards.append(r)
+        step += 1
+    nonzero = sum(1 for r in rewards if abs(r) > 1e-6)
+    assert nonzero >= len(rewards) * 0.5, f"Too many zero rewards: {rewards}"
+def test_no_double_retrieval():
+    """Retrieval pointer advances correctly — no container retrieved twice."""
+    env = ContainerYardEnv(difficulty="easy", seed=42)
+    env.reset()
+    seen_ids = set()
+    for _ in range(env.n_containers):
+        if env.done:
+            break
+        env.step(0 if len(env.stacks[0]) < env.max_height else 1)
+    # retrieval_pointer should be <= queue length
+    assert env.retrieval_pointer <= len(env.retrieval_queue)