Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +76 -0
- README.md +194 -5
- __init__.py +25 -0
- client.py +63 -0
- graders.py +40 -0
- gym_env.py +118 -0
- inference.py +138 -0
- models.py +73 -0
- openenv.yaml +6 -0
- openenv_crisis_logistics_env.egg-info/PKG-INFO +12 -0
- openenv_crisis_logistics_env.egg-info/SOURCES.txt +19 -0
- openenv_crisis_logistics_env.egg-info/dependency_links.txt +1 -0
- openenv_crisis_logistics_env.egg-info/entry_points.txt +2 -0
- openenv_crisis_logistics_env.egg-info/requires.txt +8 -0
- openenv_crisis_logistics_env.egg-info/top_level.txt +1 -0
- pyproject.toml +47 -0
- server/__init__.py +11 -0
- server/app.py +116 -0
- server/crisis_logistics_env_environment.py +208 -0
- server/requirements.txt +6 -0
- tasks.py +119 -0
- test_engine.py +34 -0
- train_and_evaluate.py +71 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=crisis_logistics_env
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Create an isolated environment and install dependencies cleanly inside the image.
|
| 42 |
+
RUN rm -rf /app/env/.venv
|
| 43 |
+
ENV UV_PROJECT_ENVIRONMENT=/app/env/.venv
|
| 44 |
+
|
| 45 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 46 |
+
if [ -f uv.lock ]; then \
|
| 47 |
+
uv sync --frozen; \
|
| 48 |
+
else \
|
| 49 |
+
uv sync; \
|
| 50 |
+
fi
|
| 51 |
+
|
| 52 |
+
# Final runtime stage
|
| 53 |
+
FROM ${BASE_IMAGE}
|
| 54 |
+
|
| 55 |
+
WORKDIR /app
|
| 56 |
+
|
| 57 |
+
# Copy the virtual environment from builder
|
| 58 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 59 |
+
|
| 60 |
+
# Copy the environment code
|
| 61 |
+
COPY --from=builder /app/env /app/env
|
| 62 |
+
|
| 63 |
+
# Set PATH to use the virtual environment
|
| 64 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 65 |
+
|
| 66 |
+
# Set PYTHONPATH so imports work correctly
|
| 67 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 68 |
+
|
| 69 |
+
# Health check
|
| 70 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 71 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 72 |
+
|
| 73 |
+
# Run the FastAPI server
|
| 74 |
+
# The module path is constructed to work with the /app/env structure
|
| 75 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 76 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,199 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: LogiFlow-RL
|
| 3 |
+
emoji: 🚚
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
+
- logistics
|
| 13 |
+
- reinforcement-learning
|
| 14 |
---
|
| 15 |
|
| 16 |
+
# LogiFlow-RL
|
| 17 |
+
|
| 18 |
+
## Overview
|
| 19 |
+
|
| 20 |
+
LogiFlow-RL is an OpenEnv environment for dynamic supply-chain workload balancing. The agent acts as a regional routing controller that must assign each incoming shipment to one of three hubs while keeping the network stable under predictable demand, flash-sale bursts, and cascading disruptions.
|
| 21 |
+
|
| 22 |
+
This is a real-world task rather than a toy game: the agent is learning a simplified version of freight routing and capacity balancing, which are core operations in modern logistics networks.
|
| 23 |
+
|
| 24 |
+
## Why this environment is useful
|
| 25 |
+
|
| 26 |
+
Static routing rules such as round robin often fail under bursty demand because they react too slowly to overload risk. LogiFlow-RL provides a reproducible environment for training and evaluating routing agents that must reason about load, capacity, drift, and operational resilience.
|
| 27 |
+
|
| 28 |
+
## Action Space
|
| 29 |
+
|
| 30 |
+
The action is a typed Pydantic model:
|
| 31 |
+
|
| 32 |
+
- `target_hub: int`
|
| 33 |
+
- `0` route shipment to Hub A
|
| 34 |
+
- `1` route shipment to Hub B
|
| 35 |
+
- `2` route shipment to Hub C
|
| 36 |
+
|
| 37 |
+
## Observation Space
|
| 38 |
+
|
| 39 |
+
The observation is a typed Pydantic model containing:
|
| 40 |
+
|
| 41 |
+
- `task_id`: active benchmark task
|
| 42 |
+
- `difficulty`: easy, medium, or hard
|
| 43 |
+
- `objective`: natural-language task goal
|
| 44 |
+
- `hub_loads`: current utilization for the 3 hubs
|
| 45 |
+
- `drain_rates`: per-step clearing rates for non-selected hubs
|
| 46 |
+
- `incoming_load`: next scheduled shipment size
|
| 47 |
+
- `step_count` and `max_steps`
|
| 48 |
+
- `overloaded_hubs`
|
| 49 |
+
- `cumulative_score`: normalized task score in `[0.0, 1.0]`
|
| 50 |
+
- `last_reward`: shaped reward for the previous action in `[0.0, 1.0]`
|
| 51 |
+
- `event_label`: normal, flash_sale, weather_disruption, or completed
|
| 52 |
+
|
| 53 |
+
## Reward Design
|
| 54 |
+
|
| 55 |
+
The environment provides dense reward over the full trajectory:
|
| 56 |
+
|
| 57 |
+
- Positive reward when the selected hub stays in the optimal 30-70 utilization zone
|
| 58 |
+
- Penalties when the chosen hub drifts too far from the center of the target range
|
| 59 |
+
- Penalties for global imbalance across hubs
|
| 60 |
+
- Strong penalty when a routing decision causes overload above 100 utilization
|
| 61 |
+
|
| 62 |
+
Each step reward is normalized to `[0.0, 1.0]`.
|
| 63 |
+
|
| 64 |
+
## Tasks and Graders
|
| 65 |
+
|
| 66 |
+
The environment ships with three deterministic benchmark tasks and programmatic graders.
|
| 67 |
+
|
| 68 |
+
### Easy: `easy`
|
| 69 |
+
|
| 70 |
+
Title: Steady-State Rebalancing
|
| 71 |
+
|
| 72 |
+
Objective: Keep all hubs in the 30-70 utilization band during predictable daytime demand.
|
| 73 |
+
|
| 74 |
+
Expected challenge: The agent should quickly learn basic balancing behavior and avoid unnecessary skew.
|
| 75 |
+
|
| 76 |
+
### Medium: `medium`
|
| 77 |
+
|
| 78 |
+
Title: Flash Sale Containment
|
| 79 |
+
|
| 80 |
+
Objective: Absorb an afternoon flash-sale surge without letting any single hub overload.
|
| 81 |
+
|
| 82 |
+
Expected challenge: The agent must react to bursty schedules and preserve balance under transient spikes.
|
| 83 |
+
|
| 84 |
+
### Hard: `hard`
|
| 85 |
+
|
| 86 |
+
Title: Cascading Disruption Recovery
|
| 87 |
+
|
| 88 |
+
Objective: Stabilize the network through repeated surge waves and weather disruptions while preserving throughput.
|
| 89 |
+
|
| 90 |
+
Expected challenge: The agent must recover from repeated shocks and still maintain usable balance.
|
| 91 |
+
|
| 92 |
+
### Grader behavior
|
| 93 |
+
|
| 94 |
+
Each task has a deterministic grader that returns a final score in `[0.0, 1.0]` based on:
|
| 95 |
+
|
| 96 |
+
- Bottleneck avoidance
|
| 97 |
+
- Average inter-hub balance gap
|
| 98 |
+
- Average trajectory reward
|
| 99 |
+
- Fraction of steps spent in the optimal operating zone
|
| 100 |
+
|
| 101 |
+
The grader implementation is in [graders.py](C:\Users\rosha\crisis-logistics-env\crisis_logistics_env\graders.py).
|
| 102 |
+
|
| 103 |
+
## Baselines
|
| 104 |
+
|
| 105 |
+
The repo includes two reproducible local baselines:
|
| 106 |
+
|
| 107 |
+
- `round_robin`
|
| 108 |
+
- `heuristic`
|
| 109 |
+
|
| 110 |
+
Run:
|
| 111 |
+
|
| 112 |
+
```bash
|
| 113 |
+
python train_and_evaluate.py
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
Current local benchmark scores:
|
| 117 |
+
|
| 118 |
+
- `round_robin`
|
| 119 |
+
- easy: `0.800`
|
| 120 |
+
- medium: `0.886`
|
| 121 |
+
- hard: `0.863`
|
| 122 |
+
- `heuristic`
|
| 123 |
+
- easy: `0.800`
|
| 124 |
+
- medium: `0.957`
|
| 125 |
+
- hard: `0.900`
|
| 126 |
+
|
| 127 |
+
The submission baseline entrypoint is [inference.py](C:\Users\rosha\crisis-logistics-env\inference.py), which uses the OpenAI client and emits the required structured logs.
|
| 128 |
+
|
| 129 |
+
## Project Structure
|
| 130 |
+
|
| 131 |
+
```text
|
| 132 |
+
crisis_logistics_env/
|
| 133 |
+
├── __init__.py
|
| 134 |
+
├── client.py
|
| 135 |
+
├── graders.py
|
| 136 |
+
├── gym_env.py
|
| 137 |
+
├── models.py
|
| 138 |
+
├── openenv.yaml
|
| 139 |
+
├── pyproject.toml
|
| 140 |
+
├── README.md
|
| 141 |
+
├── tasks.py
|
| 142 |
+
├── test_engine.py
|
| 143 |
+
├── train_and_evaluate.py
|
| 144 |
+
└── server/
|
| 145 |
+
├── app.py
|
| 146 |
+
├── crisis_logistics_env_environment.py
|
| 147 |
+
└── Dockerfile
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
## Setup
|
| 151 |
+
|
| 152 |
+
### Local run
|
| 153 |
+
|
| 154 |
+
```bash
|
| 155 |
+
python test_engine.py
|
| 156 |
+
python train_and_evaluate.py
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
### Baseline inference
|
| 160 |
+
|
| 161 |
+
Set the required environment variables:
|
| 162 |
+
|
| 163 |
+
- `API_BASE_URL`
|
| 164 |
+
- `MODEL_NAME`
|
| 165 |
+
- `HF_TOKEN` or `OPENAI_API_KEY`
|
| 166 |
+
|
| 167 |
+
Then run from the repository root:
|
| 168 |
+
|
| 169 |
+
```bash
|
| 170 |
+
python inference.py
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
### Local server
|
| 174 |
+
|
| 175 |
+
```bash
|
| 176 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
### Docker
|
| 180 |
+
|
| 181 |
+
```bash
|
| 182 |
+
docker build -t logiflow-rl -f server/Dockerfile .
|
| 183 |
+
docker run -p 8000:8000 logiflow-rl
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
## Validation Notes
|
| 187 |
+
|
| 188 |
+
The environment includes:
|
| 189 |
+
|
| 190 |
+
- Typed action, observation, and state models
|
| 191 |
+
- Deterministic tasks with increasing difficulty
|
| 192 |
+
- Programmatic graders with scores in `[0.0, 1.0]`
|
| 193 |
+
- Dense partial-progress reward shaping
|
| 194 |
+
- Root-level `inference.py`
|
| 195 |
+
- Dockerfile and OpenEnv manifest
|
| 196 |
+
|
| 197 |
+
## Submission One-Liner
|
| 198 |
+
|
| 199 |
+
LogiFlow-RL is an OpenEnv benchmark for dynamic freight routing where an agent must balance shipments across regional hubs under flash-sale and disruption scenarios, with deterministic tasks and normalized graders for reproducible evaluation.
|
__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Crisis Logistics Env Environment."""
|
| 8 |
+
|
| 9 |
+
from .client import CrisisLogisticsEnv
|
| 10 |
+
from .graders import EpisodeMetrics, grade_episode
|
| 11 |
+
from .gym_env import LogiFlowGymEnv
|
| 12 |
+
from .models import CrisisLogisticsAction, CrisisLogisticsObservation, CrisisLogisticsState
|
| 13 |
+
from .tasks import get_task, list_tasks
|
| 14 |
+
|
| 15 |
+
__all__ = [
|
| 16 |
+
"CrisisLogisticsAction",
|
| 17 |
+
"CrisisLogisticsObservation",
|
| 18 |
+
"CrisisLogisticsState",
|
| 19 |
+
"CrisisLogisticsEnv",
|
| 20 |
+
"LogiFlowGymEnv",
|
| 21 |
+
"EpisodeMetrics",
|
| 22 |
+
"grade_episode",
|
| 23 |
+
"get_task",
|
| 24 |
+
"list_tasks",
|
| 25 |
+
]
|
client.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Client for the LogiFlow-RL environment server."""
|
| 8 |
+
|
| 9 |
+
from typing import Dict
|
| 10 |
+
|
| 11 |
+
from openenv.core import EnvClient
|
| 12 |
+
from openenv.core.client_types import StepResult
|
| 13 |
+
from openenv.core.env_server.types import State
|
| 14 |
+
|
| 15 |
+
from .models import CrisisLogisticsAction, CrisisLogisticsObservation, CrisisLogisticsState
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class CrisisLogisticsEnv(
|
| 19 |
+
EnvClient[CrisisLogisticsAction, CrisisLogisticsObservation, CrisisLogisticsState]
|
| 20 |
+
):
|
| 21 |
+
"""Thin client that talks to the HTTP or WebSocket server."""
|
| 22 |
+
|
| 23 |
+
def _step_payload(self, action: CrisisLogisticsAction) -> Dict:
|
| 24 |
+
return {"target_hub": action.target_hub}
|
| 25 |
+
|
| 26 |
+
def _parse_result(self, payload: Dict) -> StepResult[CrisisLogisticsObservation]:
|
| 27 |
+
obs_data = payload.get("observation", {})
|
| 28 |
+
observation = CrisisLogisticsObservation(
|
| 29 |
+
task_id=obs_data.get("task_id", "easy"),
|
| 30 |
+
difficulty=obs_data.get("difficulty", "easy"),
|
| 31 |
+
objective=obs_data.get("objective", ""),
|
| 32 |
+
hub_loads=obs_data.get("hub_loads", [0.0, 0.0, 0.0]),
|
| 33 |
+
drain_rates=obs_data.get("drain_rates", [6.0, 5.0, 4.0]),
|
| 34 |
+
incoming_load=obs_data.get("incoming_load", 0.0),
|
| 35 |
+
step_count=obs_data.get("step_count", 0),
|
| 36 |
+
max_steps=obs_data.get("max_steps", 100),
|
| 37 |
+
overloaded_hubs=obs_data.get("overloaded_hubs", 0),
|
| 38 |
+
cumulative_score=obs_data.get("cumulative_score", 0.0),
|
| 39 |
+
last_reward=obs_data.get("last_reward", 0.0),
|
| 40 |
+
event_label=obs_data.get("event_label", "normal"),
|
| 41 |
+
message=obs_data.get("message", ""),
|
| 42 |
+
reward=payload.get("reward"),
|
| 43 |
+
done=payload.get("done", False),
|
| 44 |
+
metadata=obs_data.get("metadata", {}),
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
return StepResult(
|
| 48 |
+
observation=observation,
|
| 49 |
+
reward=payload.get("reward"),
|
| 50 |
+
done=payload.get("done", False),
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
def _parse_state(self, payload: Dict) -> CrisisLogisticsState:
|
| 54 |
+
return CrisisLogisticsState(
|
| 55 |
+
episode_id=payload.get("episode_id"),
|
| 56 |
+
step_count=payload.get("step_count", 0),
|
| 57 |
+
task_id=payload.get("task_id", "easy"),
|
| 58 |
+
difficulty=payload.get("difficulty", "easy"),
|
| 59 |
+
hub_loads=payload.get("hub_loads", [0.0, 0.0, 0.0]),
|
| 60 |
+
incoming_index=payload.get("incoming_index", 0),
|
| 61 |
+
bottlenecks=payload.get("bottlenecks", 0),
|
| 62 |
+
score=payload.get("score", 0.0),
|
| 63 |
+
)
|
graders.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
try:
|
| 6 |
+
from .tasks import TaskConfig
|
| 7 |
+
except ImportError:
|
| 8 |
+
from tasks import TaskConfig
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class EpisodeMetrics:
|
| 13 |
+
total_reward: float
|
| 14 |
+
average_reward: float
|
| 15 |
+
bottlenecks: int
|
| 16 |
+
optimal_steps: int
|
| 17 |
+
average_balance_gap: float
|
| 18 |
+
throughput_served: float
|
| 19 |
+
steps_completed: int
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def grade_episode(task: TaskConfig, metrics: EpisodeMetrics) -> float:
|
| 23 |
+
bottleneck_score = max(
|
| 24 |
+
0.0,
|
| 25 |
+
1.0 - max(0, metrics.bottlenecks - task.target_bottlenecks) / max(1, task.max_steps / 4),
|
| 26 |
+
)
|
| 27 |
+
balance_score = max(
|
| 28 |
+
0.0,
|
| 29 |
+
1.0 - max(0.0, metrics.average_balance_gap - task.target_balance_gap) / 40.0,
|
| 30 |
+
)
|
| 31 |
+
efficiency_score = min(1.0, metrics.average_reward / max(task.minimum_avg_reward, 0.01))
|
| 32 |
+
stability_score = metrics.optimal_steps / task.max_steps
|
| 33 |
+
|
| 34 |
+
final_score = (
|
| 35 |
+
0.35 * bottleneck_score
|
| 36 |
+
+ 0.25 * balance_score
|
| 37 |
+
+ 0.20 * efficiency_score
|
| 38 |
+
+ 0.20 * stability_score
|
| 39 |
+
)
|
| 40 |
+
return round(max(0.0, min(1.0, final_score)), 3)
|
gym_env.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import random
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
import gymnasium as gym
|
| 7 |
+
import numpy as np
|
| 8 |
+
from gymnasium import spaces
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class LogiFlowGymEnv(gym.Env):
|
| 12 |
+
"""Gymnasium wrapper for the 3-hub logistics balancing problem."""
|
| 13 |
+
|
| 14 |
+
metadata = {"render_modes": ["human"], "render_fps": 4}
|
| 15 |
+
|
| 16 |
+
def __init__(self, max_steps: int = 100):
|
| 17 |
+
super().__init__()
|
| 18 |
+
self.max_steps = max_steps
|
| 19 |
+
self.base_drain_rates = np.array([8.0, 7.0, 6.0], dtype=np.float32)
|
| 20 |
+
self.action_space = spaces.Discrete(3)
|
| 21 |
+
self.observation_space = spaces.Box(
|
| 22 |
+
low=np.array([0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32),
|
| 23 |
+
high=np.array([150, 150, 150, 20, 20, 20, 40, 2], dtype=np.float32),
|
| 24 |
+
dtype=np.float32,
|
| 25 |
+
)
|
| 26 |
+
self.reset()
|
| 27 |
+
|
| 28 |
+
def reset(
|
| 29 |
+
self, *, seed: int | None = None, options: dict[str, Any] | None = None
|
| 30 |
+
) -> tuple[np.ndarray, dict[str, Any]]:
|
| 31 |
+
super().reset(seed=seed)
|
| 32 |
+
if seed is not None:
|
| 33 |
+
random.seed(seed)
|
| 34 |
+
|
| 35 |
+
self.hub_loads = np.array([24.0, 36.0, 30.0], dtype=np.float32)
|
| 36 |
+
self.drain_rates = self.base_drain_rates.copy()
|
| 37 |
+
self.step_count = 0
|
| 38 |
+
self.event_label = "normal"
|
| 39 |
+
self.incoming_load = self._sample_incoming_load()
|
| 40 |
+
return self._get_obs(), self._get_info(0.0)
|
| 41 |
+
|
| 42 |
+
def step(self, action: int):
|
| 43 |
+
self.step_count += 1
|
| 44 |
+
|
| 45 |
+
for idx in range(3):
|
| 46 |
+
if idx != action:
|
| 47 |
+
self.hub_loads[idx] = max(0.0, self.hub_loads[idx] - self.drain_rates[idx])
|
| 48 |
+
|
| 49 |
+
self.hub_loads[action] += self.incoming_load
|
| 50 |
+
reward = self._calculate_reward(action)
|
| 51 |
+
|
| 52 |
+
self.event_label = self._sample_event_label()
|
| 53 |
+
self.incoming_load = self._sample_incoming_load(self.event_label)
|
| 54 |
+
|
| 55 |
+
terminated = False
|
| 56 |
+
truncated = self.step_count >= self.max_steps
|
| 57 |
+
return self._get_obs(), reward, terminated, truncated, self._get_info(reward)
|
| 58 |
+
|
| 59 |
+
def render(self):
|
| 60 |
+
print(
|
| 61 |
+
f"step={self.step_count} loads={self.hub_loads.round(1).tolist()} "
|
| 62 |
+
f"incoming={self.incoming_load:.1f} event={self.event_label}"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
def _get_obs(self) -> np.ndarray:
|
| 66 |
+
event_id = {"normal": 0.0, "weather_disruption": 1.0, "flash_sale": 2.0}[self.event_label]
|
| 67 |
+
return np.array(
|
| 68 |
+
[
|
| 69 |
+
self.hub_loads[0],
|
| 70 |
+
self.hub_loads[1],
|
| 71 |
+
self.hub_loads[2],
|
| 72 |
+
self.drain_rates[0],
|
| 73 |
+
self.drain_rates[1],
|
| 74 |
+
self.drain_rates[2],
|
| 75 |
+
self.incoming_load,
|
| 76 |
+
event_id,
|
| 77 |
+
],
|
| 78 |
+
dtype=np.float32,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
def _get_info(self, reward: float) -> dict[str, Any]:
|
| 82 |
+
return {
|
| 83 |
+
"reward": reward,
|
| 84 |
+
"overloaded_hubs": int(np.sum(self.hub_loads > 100.0)),
|
| 85 |
+
"event_label": self.event_label,
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
def _sample_event_label(self) -> str:
|
| 89 |
+
roll = random.random()
|
| 90 |
+
if roll < 0.15:
|
| 91 |
+
return "flash_sale"
|
| 92 |
+
if roll < 0.25:
|
| 93 |
+
return "weather_disruption"
|
| 94 |
+
return "normal"
|
| 95 |
+
|
| 96 |
+
def _sample_incoming_load(self, event_label: str | None = None) -> float:
|
| 97 |
+
label = event_label or self.event_label
|
| 98 |
+
if label == "flash_sale":
|
| 99 |
+
return random.uniform(16.0, 24.0)
|
| 100 |
+
if label == "weather_disruption":
|
| 101 |
+
return random.uniform(11.0, 18.0)
|
| 102 |
+
return random.uniform(6.0, 12.0)
|
| 103 |
+
|
| 104 |
+
def _calculate_reward(self, action: int) -> float:
|
| 105 |
+
reward = 0.5
|
| 106 |
+
target_load = self.hub_loads[action]
|
| 107 |
+
|
| 108 |
+
if 30.0 <= target_load <= 70.0:
|
| 109 |
+
reward += 5.0
|
| 110 |
+
else:
|
| 111 |
+
reward -= min(abs(target_load - 50.0) / 15.0, 4.0)
|
| 112 |
+
|
| 113 |
+
if target_load > 100.0:
|
| 114 |
+
reward -= 20.0
|
| 115 |
+
|
| 116 |
+
reward -= float(np.max(self.hub_loads) - np.min(self.hub_loads)) / 50.0
|
| 117 |
+
reward -= float(np.sum(self.hub_loads > 100.0)) * 3.0
|
| 118 |
+
return round(reward, 2)
|
inference.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
|
| 6 |
+
from crisis_logistics_env import CrisisLogisticsAction
|
| 7 |
+
from crisis_logistics_env.server.crisis_logistics_env_environment import (
|
| 8 |
+
CrisisLogisticsEnvironment,
|
| 9 |
+
choose_balancing_action,
|
| 10 |
+
)
|
| 11 |
+
from crisis_logistics_env.tasks import list_tasks
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY") or os.getenv("API_KEY")
|
| 15 |
+
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 16 |
+
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
|
| 17 |
+
BENCHMARK = os.getenv("BENCHMARK") or "logiflow_rl"
|
| 18 |
+
MAX_STEPS_OVERRIDE = os.getenv("MAX_STEPS")
|
| 19 |
+
|
| 20 |
+
SYSTEM_PROMPT = (
|
| 21 |
+
"You are controlling a logistics routing environment with 3 hubs. "
|
| 22 |
+
"Reply with exactly one digit: 0, 1, or 2. "
|
| 23 |
+
"Choose the hub that best keeps the network balanced, avoids overload above 100, "
|
| 24 |
+
"and keeps hubs near the 30-70 utilization band."
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def log_start(task: str, env: str, model: str) -> None:
|
| 29 |
+
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 33 |
+
error_val = error if error else "null"
|
| 34 |
+
done_val = str(done).lower()
|
| 35 |
+
print(
|
| 36 |
+
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
|
| 37 |
+
flush=True,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 42 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 43 |
+
print(
|
| 44 |
+
f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
|
| 45 |
+
flush=True,
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def build_user_prompt(task_title: str, objective: str, step: int, hub_loads: List[float], incoming_load: float, event_label: str, score: float) -> str:
|
| 50 |
+
return (
|
| 51 |
+
f"Task: {task_title}\n"
|
| 52 |
+
f"Objective: {objective}\n"
|
| 53 |
+
f"Step: {step}\n"
|
| 54 |
+
f"Hub loads: {hub_loads}\n"
|
| 55 |
+
f"Incoming shipment: {incoming_load}\n"
|
| 56 |
+
f"Traffic event: {event_label}\n"
|
| 57 |
+
f"Current score: {score:.3f}\n"
|
| 58 |
+
"Return only one hub id: 0, 1, or 2."
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def choose_action_with_model(client: OpenAI, prompt: str) -> int:
|
| 63 |
+
response = client.chat.completions.create(
|
| 64 |
+
model=MODEL_NAME,
|
| 65 |
+
temperature=0.0,
|
| 66 |
+
max_tokens=4,
|
| 67 |
+
messages=[
|
| 68 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 69 |
+
{"role": "user", "content": prompt},
|
| 70 |
+
],
|
| 71 |
+
)
|
| 72 |
+
text = (response.choices[0].message.content or "").strip()
|
| 73 |
+
if text and text[0] in {"0", "1", "2"}:
|
| 74 |
+
return int(text[0])
|
| 75 |
+
raise ValueError(f"invalid_model_output:{text}")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def run_task(task_id: str, client: Optional[OpenAI]) -> float:
|
| 79 |
+
env = CrisisLogisticsEnvironment()
|
| 80 |
+
observation = env.reset(task_id=task_id)
|
| 81 |
+
rewards: List[float] = []
|
| 82 |
+
last_error: Optional[str] = None
|
| 83 |
+
max_steps = min(
|
| 84 |
+
env.task.max_steps,
|
| 85 |
+
int(MAX_STEPS_OVERRIDE) if MAX_STEPS_OVERRIDE else env.task.max_steps,
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
log_start(task_id, BENCHMARK, MODEL_NAME)
|
| 89 |
+
|
| 90 |
+
try:
|
| 91 |
+
while not observation.done and observation.step_count < max_steps:
|
| 92 |
+
action_value = choose_balancing_action(observation)
|
| 93 |
+
if client is not None:
|
| 94 |
+
prompt = build_user_prompt(
|
| 95 |
+
env.task.title,
|
| 96 |
+
observation.objective,
|
| 97 |
+
observation.step_count + 1,
|
| 98 |
+
observation.hub_loads,
|
| 99 |
+
observation.incoming_load,
|
| 100 |
+
observation.event_label,
|
| 101 |
+
observation.cumulative_score,
|
| 102 |
+
)
|
| 103 |
+
try:
|
| 104 |
+
action_value = choose_action_with_model(client, prompt)
|
| 105 |
+
last_error = None
|
| 106 |
+
except Exception as exc:
|
| 107 |
+
last_error = str(exc)
|
| 108 |
+
|
| 109 |
+
action = CrisisLogisticsAction(target_hub=action_value)
|
| 110 |
+
observation = env.step(action)
|
| 111 |
+
reward = float(observation.reward or 0.0)
|
| 112 |
+
rewards.append(reward)
|
| 113 |
+
log_step(
|
| 114 |
+
step=observation.step_count,
|
| 115 |
+
action=f"route({action_value})",
|
| 116 |
+
reward=reward,
|
| 117 |
+
done=observation.done,
|
| 118 |
+
error=last_error,
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
final_score = observation.cumulative_score
|
| 122 |
+
success = final_score >= 0.65
|
| 123 |
+
return_score = final_score
|
| 124 |
+
log_end(success, observation.step_count, return_score, rewards)
|
| 125 |
+
return return_score
|
| 126 |
+
except Exception:
|
| 127 |
+
log_end(False, observation.step_count, 0.0, rewards)
|
| 128 |
+
raise
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def main() -> None:
|
| 132 |
+
client = OpenAI(api_key=API_KEY, base_url=API_BASE_URL) if API_KEY else None
|
| 133 |
+
for task in list_tasks():
|
| 134 |
+
run_task(task.task_id, client)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
if __name__ == "__main__":
|
| 138 |
+
main()
|
models.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Data models for the LogiFlow-RL environment."""
|
| 8 |
+
|
| 9 |
+
from typing import List
|
| 10 |
+
|
| 11 |
+
from openenv.core.env_server.types import Action, Observation, State
|
| 12 |
+
from pydantic import Field
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class CrisisLogisticsAction(Action):
|
| 16 |
+
"""Route the next shipment to one of the available hubs."""
|
| 17 |
+
|
| 18 |
+
target_hub: int = Field(
|
| 19 |
+
...,
|
| 20 |
+
ge=0,
|
| 21 |
+
le=2,
|
| 22 |
+
description="Index of the hub that should receive the incoming shipment: 0, 1, or 2.",
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class CrisisLogisticsObservation(Observation):
|
| 27 |
+
"""Current state of the regional hub network."""
|
| 28 |
+
|
| 29 |
+
task_id: str = Field(default="easy", description="Active benchmark task identifier.")
|
| 30 |
+
difficulty: str = Field(default="easy", description="Difficulty label for the active task.")
|
| 31 |
+
objective: str = Field(default="", description="Task objective visible to the agent.")
|
| 32 |
+
hub_loads: List[float] = Field(
|
| 33 |
+
default_factory=lambda: [0.0, 0.0, 0.0],
|
| 34 |
+
description="Current utilization percentage for each hub.",
|
| 35 |
+
)
|
| 36 |
+
drain_rates: List[float] = Field(
|
| 37 |
+
default_factory=lambda: [6.0, 5.0, 4.0],
|
| 38 |
+
description="How much load each hub clears per timestep when not selected.",
|
| 39 |
+
)
|
| 40 |
+
incoming_load: float = Field(
|
| 41 |
+
default=0.0,
|
| 42 |
+
description="Load percentage of the shipment that must be assigned this step.",
|
| 43 |
+
)
|
| 44 |
+
step_count: int = Field(default=0, description="Current step in the episode.")
|
| 45 |
+
max_steps: int = Field(default=100, description="Maximum episode length.")
|
| 46 |
+
overloaded_hubs: int = Field(
|
| 47 |
+
default=0,
|
| 48 |
+
description="Number of hubs currently above 100 percent utilization.",
|
| 49 |
+
)
|
| 50 |
+
cumulative_score: float = Field(
|
| 51 |
+
default=0.0,
|
| 52 |
+
description="Normalized score in the range [0.0, 1.0] for progress so far in the episode.",
|
| 53 |
+
)
|
| 54 |
+
last_reward: float = Field(default=0.0, description="Reward from the previous action.")
|
| 55 |
+
event_label: str = Field(
|
| 56 |
+
default="normal",
|
| 57 |
+
description="Traffic condition for the current shipment, e.g. normal or flash_sale.",
|
| 58 |
+
)
|
| 59 |
+
message: str = Field(default="", description="Human-readable state summary.")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class CrisisLogisticsState(State):
|
| 63 |
+
"""Internal environment state exposed for validation and debugging."""
|
| 64 |
+
|
| 65 |
+
task_id: str = Field(default="easy", description="Active task identifier.")
|
| 66 |
+
difficulty: str = Field(default="easy", description="Task difficulty label.")
|
| 67 |
+
hub_loads: List[float] = Field(
|
| 68 |
+
default_factory=lambda: [0.0, 0.0, 0.0],
|
| 69 |
+
description="Current utilization values for each hub.",
|
| 70 |
+
)
|
| 71 |
+
incoming_index: int = Field(default=0, description="Index of the next scheduled shipment.")
|
| 72 |
+
bottlenecks: int = Field(default=0, description="Total bottlenecks encountered this episode.")
|
| 73 |
+
score: float = Field(default=0.0, description="Current normalized task score.")
|
openenv.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: logiflow_rl
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
openenv_crisis_logistics_env.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-crisis_logistics_env
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: LogiFlow-RL environment for OpenEnv
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: openenv-core[core]>=0.2.2
|
| 7 |
+
Requires-Dist: numpy>=1.26.0
|
| 8 |
+
Requires-Dist: gymnasium>=0.29.0
|
| 9 |
+
Requires-Dist: openai>=2.0.0
|
| 10 |
+
Provides-Extra: dev
|
| 11 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 12 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_crisis_logistics_env.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
pyproject.toml
|
| 3 |
+
./__init__.py
|
| 4 |
+
./client.py
|
| 5 |
+
./graders.py
|
| 6 |
+
./gym_env.py
|
| 7 |
+
./models.py
|
| 8 |
+
./tasks.py
|
| 9 |
+
./test_engine.py
|
| 10 |
+
./train_and_evaluate.py
|
| 11 |
+
openenv_crisis_logistics_env.egg-info/PKG-INFO
|
| 12 |
+
openenv_crisis_logistics_env.egg-info/SOURCES.txt
|
| 13 |
+
openenv_crisis_logistics_env.egg-info/dependency_links.txt
|
| 14 |
+
openenv_crisis_logistics_env.egg-info/entry_points.txt
|
| 15 |
+
openenv_crisis_logistics_env.egg-info/requires.txt
|
| 16 |
+
openenv_crisis_logistics_env.egg-info/top_level.txt
|
| 17 |
+
server/__init__.py
|
| 18 |
+
server/app.py
|
| 19 |
+
server/crisis_logistics_env_environment.py
|
openenv_crisis_logistics_env.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_crisis_logistics_env.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = crisis_logistics_env.server.app:main
|
openenv_crisis_logistics_env.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core[core]>=0.2.2
|
| 2 |
+
numpy>=1.26.0
|
| 3 |
+
gymnasium>=0.29.0
|
| 4 |
+
openai>=2.0.0
|
| 5 |
+
|
| 6 |
+
[dev]
|
| 7 |
+
pytest>=8.0.0
|
| 8 |
+
pytest-cov>=4.0.0
|
openenv_crisis_logistics_env.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
crisis_logistics_env
|
pyproject.toml
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-crisis_logistics_env"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "LogiFlow-RL environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
+
# install from github
|
| 19 |
+
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
+
"openenv-core[core]>=0.2.2",
|
| 21 |
+
# Environment-specific dependencies
|
| 22 |
+
# Add all dependencies needed for your environment here
|
| 23 |
+
# Examples:
|
| 24 |
+
"numpy>=1.26.0",
|
| 25 |
+
"gymnasium>=0.29.0",
|
| 26 |
+
"openai>=2.0.0",
|
| 27 |
+
# "torch>=2.0.0",
|
| 28 |
+
# "gymnasium>=0.29.0",
|
| 29 |
+
# "openspiel>=1.0.0",
|
| 30 |
+
# "smolagents>=1.22.0,<2",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
[project.optional-dependencies]
|
| 34 |
+
dev = [
|
| 35 |
+
"pytest>=8.0.0",
|
| 36 |
+
"pytest-cov>=4.0.0",
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
[project.scripts]
|
| 40 |
+
# Server entry point - enables running via: uv run --project . server
|
| 41 |
+
# or: python -m crisis_logistics_env.server.app
|
| 42 |
+
server = "crisis_logistics_env.server.app:main"
|
| 43 |
+
|
| 44 |
+
[tool.setuptools]
|
| 45 |
+
include-package-data = true
|
| 46 |
+
packages = ["crisis_logistics_env", "crisis_logistics_env.server"]
|
| 47 |
+
package-dir = { "crisis_logistics_env" = ".", "crisis_logistics_env.server" = "server" }
|
server/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Crisis Logistics Env environment server components."""
|
| 8 |
+
|
| 9 |
+
from .crisis_logistics_env_environment import CrisisLogisticsEnvironment
|
| 10 |
+
|
| 11 |
+
__all__ = ["CrisisLogisticsEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""FastAPI application for the LogiFlow-RL OpenEnv environment."""
|
| 8 |
+
|
| 9 |
+
from fastapi import FastAPI
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
from openenv.core.env_server.types import (
|
| 13 |
+
EnvironmentMetadata,
|
| 14 |
+
HealthResponse,
|
| 15 |
+
HealthStatus,
|
| 16 |
+
ResetRequest,
|
| 17 |
+
ResetResponse,
|
| 18 |
+
SchemaResponse,
|
| 19 |
+
StepRequest,
|
| 20 |
+
StepResponse,
|
| 21 |
+
)
|
| 22 |
+
from ..models import (
|
| 23 |
+
CrisisLogisticsAction,
|
| 24 |
+
CrisisLogisticsObservation,
|
| 25 |
+
CrisisLogisticsState,
|
| 26 |
+
)
|
| 27 |
+
from .crisis_logistics_env_environment import CrisisLogisticsEnvironment
|
| 28 |
+
except ImportError:
|
| 29 |
+
from openenv.core.env_server.types import (
|
| 30 |
+
EnvironmentMetadata,
|
| 31 |
+
HealthResponse,
|
| 32 |
+
HealthStatus,
|
| 33 |
+
ResetRequest,
|
| 34 |
+
ResetResponse,
|
| 35 |
+
SchemaResponse,
|
| 36 |
+
StepRequest,
|
| 37 |
+
StepResponse,
|
| 38 |
+
)
|
| 39 |
+
from models import (
|
| 40 |
+
CrisisLogisticsAction,
|
| 41 |
+
CrisisLogisticsObservation,
|
| 42 |
+
CrisisLogisticsState,
|
| 43 |
+
)
|
| 44 |
+
from server.crisis_logistics_env_environment import CrisisLogisticsEnvironment
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
app = FastAPI(
|
| 48 |
+
title="OpenEnv Environment HTTP API",
|
| 49 |
+
version="1.0.0",
|
| 50 |
+
description=(
|
| 51 |
+
"HTTP API for interacting with the LogiFlow-RL environment through "
|
| 52 |
+
"a standardized OpenEnv-style interface."
|
| 53 |
+
),
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
env = CrisisLogisticsEnvironment()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@app.post("/reset", response_model=ResetResponse, tags=["Environment Control"])
|
| 60 |
+
async def reset_environment(request: ResetRequest) -> ResetResponse:
|
| 61 |
+
task_id = getattr(request, "task_id", None) or "easy"
|
| 62 |
+
observation = env.reset(seed=request.seed, episode_id=request.episode_id, task_id=task_id)
|
| 63 |
+
return ResetResponse(
|
| 64 |
+
observation=observation.model_dump(),
|
| 65 |
+
reward=float(observation.reward or 0.0),
|
| 66 |
+
done=observation.done,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
@app.post("/step", response_model=StepResponse, tags=["Environment Control"])
|
| 71 |
+
async def step_environment(request: StepRequest) -> StepResponse:
|
| 72 |
+
action = CrisisLogisticsAction(**request.action)
|
| 73 |
+
observation = env.step(action, timeout_s=request.timeout_s)
|
| 74 |
+
return StepResponse(
|
| 75 |
+
observation=observation.model_dump(),
|
| 76 |
+
reward=float(observation.reward or 0.0),
|
| 77 |
+
done=observation.done,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
@app.get("/state", response_model=CrisisLogisticsState, tags=["State Management"])
|
| 82 |
+
async def get_state() -> CrisisLogisticsState:
|
| 83 |
+
return env.state
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@app.get("/metadata", response_model=EnvironmentMetadata, tags=["Environment Info"])
|
| 87 |
+
async def get_metadata() -> EnvironmentMetadata:
|
| 88 |
+
return EnvironmentMetadata(
|
| 89 |
+
name="LogiFlow-RL",
|
| 90 |
+
description="Deterministic logistics routing benchmark with easy, medium, and hard tasks.",
|
| 91 |
+
version="1.0.0",
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
@app.get("/schema", response_model=SchemaResponse, tags=["Schema"])
|
| 96 |
+
async def get_schema() -> SchemaResponse:
|
| 97 |
+
return SchemaResponse(
|
| 98 |
+
action=CrisisLogisticsAction.model_json_schema(),
|
| 99 |
+
observation=CrisisLogisticsObservation.model_json_schema(),
|
| 100 |
+
state=CrisisLogisticsState.model_json_schema(),
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
@app.get("/health", response_model=HealthResponse, tags=["Health"])
|
| 105 |
+
async def health() -> HealthResponse:
|
| 106 |
+
return HealthResponse(status=HealthStatus.HEALTHY)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def main() -> None:
|
| 110 |
+
import uvicorn
|
| 111 |
+
|
| 112 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
if __name__ == "__main__":
|
| 116 |
+
main()
|
server/crisis_logistics_env_environment.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import uuid
|
| 4 |
+
from typing import List
|
| 5 |
+
|
| 6 |
+
from openenv.core.env_server import Environment
|
| 7 |
+
|
| 8 |
+
try:
|
| 9 |
+
from ..graders import EpisodeMetrics, grade_episode
|
| 10 |
+
from ..models import (
|
| 11 |
+
CrisisLogisticsAction,
|
| 12 |
+
CrisisLogisticsObservation,
|
| 13 |
+
CrisisLogisticsState,
|
| 14 |
+
)
|
| 15 |
+
from ..tasks import get_task, list_tasks
|
| 16 |
+
except ImportError:
|
| 17 |
+
from graders import EpisodeMetrics, grade_episode
|
| 18 |
+
from models import (
|
| 19 |
+
CrisisLogisticsAction,
|
| 20 |
+
CrisisLogisticsObservation,
|
| 21 |
+
CrisisLogisticsState,
|
| 22 |
+
)
|
| 23 |
+
from tasks import get_task, list_tasks
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class CrisisLogisticsEnvironment(
|
| 27 |
+
Environment[CrisisLogisticsAction, CrisisLogisticsObservation, CrisisLogisticsState]
|
| 28 |
+
):
|
| 29 |
+
"""
|
| 30 |
+
LogiFlow-RL: a deterministic supply-chain balancing benchmark.
|
| 31 |
+
|
| 32 |
+
The environment exposes three benchmark tasks with increasing difficulty.
|
| 33 |
+
Each episode provides a fixed shipment schedule so scores are reproducible.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self):
|
| 37 |
+
super().__init__()
|
| 38 |
+
self.optimal_zone = (30.0, 70.0)
|
| 39 |
+
self.available_tasks = list_tasks()
|
| 40 |
+
self.reset(task_id="easy")
|
| 41 |
+
|
| 42 |
+
def reset(
|
| 43 |
+
self, seed=None, episode_id=None, task_id: str = "easy", **kwargs
|
| 44 |
+
) -> CrisisLogisticsObservation:
|
| 45 |
+
self.task = get_task(task_id)
|
| 46 |
+
self.episode_id = episode_id or str(uuid.uuid4())
|
| 47 |
+
self.hub_loads = self.task.initial_loads[:]
|
| 48 |
+
self.drain_rates = self.task.drain_rates[:]
|
| 49 |
+
self.step_count = 0
|
| 50 |
+
self.schedule_index = 0
|
| 51 |
+
self.done = False
|
| 52 |
+
self.last_reward = 0.0
|
| 53 |
+
self.total_reward = 0.0
|
| 54 |
+
self.optimal_steps = 0
|
| 55 |
+
self.bottlenecks = 0
|
| 56 |
+
self.balance_gap_history: List[float] = []
|
| 57 |
+
self.throughput_served = 0.0
|
| 58 |
+
self.event_label = self.task.event_schedule[0]
|
| 59 |
+
self.incoming_load = self.task.incoming_schedule[0]
|
| 60 |
+
self.score = 0.0
|
| 61 |
+
return self._get_observation(
|
| 62 |
+
f"Task '{self.task.title}' initialized. Route the scheduled shipment stream."
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
def step(
|
| 66 |
+
self, action: CrisisLogisticsAction, timeout_s=None, **kwargs
|
| 67 |
+
) -> CrisisLogisticsObservation:
|
| 68 |
+
if self.done:
|
| 69 |
+
observation = self._get_observation("Episode already finished.")
|
| 70 |
+
observation.reward = 0.0
|
| 71 |
+
return observation
|
| 72 |
+
|
| 73 |
+
selected = action.target_hub
|
| 74 |
+
if selected not in (0, 1, 2):
|
| 75 |
+
observation = self._get_observation("Invalid hub selected.")
|
| 76 |
+
observation.reward = 0.0
|
| 77 |
+
return observation
|
| 78 |
+
|
| 79 |
+
self.step_count += 1
|
| 80 |
+
|
| 81 |
+
for hub_index in range(3):
|
| 82 |
+
if hub_index != selected:
|
| 83 |
+
self.hub_loads[hub_index] = max(
|
| 84 |
+
0.0, self.hub_loads[hub_index] - self.drain_rates[hub_index]
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
self.hub_loads[selected] += self.incoming_load
|
| 88 |
+
self.throughput_served += self.incoming_load
|
| 89 |
+
|
| 90 |
+
overloaded = any(load > 100.0 for load in self.hub_loads)
|
| 91 |
+
if overloaded:
|
| 92 |
+
self.bottlenecks += 1
|
| 93 |
+
|
| 94 |
+
reward = self._calculate_step_reward(selected)
|
| 95 |
+
self.total_reward += reward
|
| 96 |
+
self.last_reward = reward
|
| 97 |
+
|
| 98 |
+
if self._is_optimal_state():
|
| 99 |
+
self.optimal_steps += 1
|
| 100 |
+
|
| 101 |
+
self.balance_gap_history.append(max(self.hub_loads) - min(self.hub_loads))
|
| 102 |
+
|
| 103 |
+
if self.step_count >= self.task.max_steps:
|
| 104 |
+
self.done = True
|
| 105 |
+
else:
|
| 106 |
+
self.schedule_index = self.step_count
|
| 107 |
+
self.event_label = self.task.event_schedule[self.schedule_index]
|
| 108 |
+
self.incoming_load = self.task.incoming_schedule[self.schedule_index]
|
| 109 |
+
|
| 110 |
+
self.score = self._compute_score()
|
| 111 |
+
observation = self._get_observation(
|
| 112 |
+
f"Shipment routed to Hub {selected}. Loads: {[round(load, 1) for load in self.hub_loads]}"
|
| 113 |
+
)
|
| 114 |
+
observation.reward = reward
|
| 115 |
+
observation.done = self.done
|
| 116 |
+
return observation
|
| 117 |
+
|
| 118 |
+
def _calculate_step_reward(self, selected: int) -> float:
|
| 119 |
+
target_load = self.hub_loads[selected]
|
| 120 |
+
center = 50.0
|
| 121 |
+
zone_bonus = 1.0 if 30.0 <= target_load <= 70.0 else 0.0
|
| 122 |
+
load_penalty = min(abs(target_load - center) / 50.0, 1.0)
|
| 123 |
+
balance_gap = max(self.hub_loads) - min(self.hub_loads)
|
| 124 |
+
balance_penalty = min(balance_gap / 100.0, 1.0)
|
| 125 |
+
overload_penalty = 1.0 if target_load > 100.0 else 0.0
|
| 126 |
+
|
| 127 |
+
reward = 0.55 + 0.35 * zone_bonus - 0.25 * load_penalty - 0.20 * balance_penalty - 0.45 * overload_penalty
|
| 128 |
+
return round(max(0.0, min(1.0, reward)), 2)
|
| 129 |
+
|
| 130 |
+
def _is_optimal_state(self) -> bool:
|
| 131 |
+
low, high = self.optimal_zone
|
| 132 |
+
return all(low <= load <= high for load in self.hub_loads)
|
| 133 |
+
|
| 134 |
+
def _compute_score(self) -> float:
|
| 135 |
+
metrics = EpisodeMetrics(
|
| 136 |
+
total_reward=self.total_reward,
|
| 137 |
+
average_reward=self.total_reward / max(self.step_count, 1),
|
| 138 |
+
bottlenecks=self.bottlenecks,
|
| 139 |
+
optimal_steps=self.optimal_steps,
|
| 140 |
+
average_balance_gap=sum(self.balance_gap_history) / max(len(self.balance_gap_history), 1),
|
| 141 |
+
throughput_served=self.throughput_served,
|
| 142 |
+
steps_completed=self.step_count,
|
| 143 |
+
)
|
| 144 |
+
return grade_episode(self.task, metrics)
|
| 145 |
+
|
| 146 |
+
def _get_observation(self, message: str) -> CrisisLogisticsObservation:
|
| 147 |
+
overloaded_hubs = sum(1 for load in self.hub_loads if load > 100.0)
|
| 148 |
+
next_incoming = 0.0 if self.done else self.incoming_load
|
| 149 |
+
next_event = "completed" if self.done else self.event_label
|
| 150 |
+
return CrisisLogisticsObservation(
|
| 151 |
+
task_id=self.task.task_id,
|
| 152 |
+
difficulty=self.task.difficulty,
|
| 153 |
+
objective=self.task.objective,
|
| 154 |
+
hub_loads=[round(load, 2) for load in self.hub_loads],
|
| 155 |
+
drain_rates=self.drain_rates[:],
|
| 156 |
+
incoming_load=next_incoming,
|
| 157 |
+
step_count=self.step_count,
|
| 158 |
+
max_steps=self.task.max_steps,
|
| 159 |
+
overloaded_hubs=overloaded_hubs,
|
| 160 |
+
cumulative_score=self.score,
|
| 161 |
+
last_reward=self.last_reward,
|
| 162 |
+
event_label=next_event,
|
| 163 |
+
message=message,
|
| 164 |
+
reward=self.last_reward,
|
| 165 |
+
done=self.done,
|
| 166 |
+
metadata={
|
| 167 |
+
"title": self.task.title,
|
| 168 |
+
"available_tasks": [task.task_id for task in self.available_tasks],
|
| 169 |
+
"bottlenecks": self.bottlenecks,
|
| 170 |
+
},
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
@property
|
| 174 |
+
def state(self) -> CrisisLogisticsState:
|
| 175 |
+
return CrisisLogisticsState(
|
| 176 |
+
episode_id=self.episode_id,
|
| 177 |
+
task_id=self.task.task_id,
|
| 178 |
+
difficulty=self.task.difficulty,
|
| 179 |
+
step_count=self.step_count,
|
| 180 |
+
hub_loads=[round(load, 2) for load in self.hub_loads],
|
| 181 |
+
incoming_index=self.schedule_index,
|
| 182 |
+
bottlenecks=self.bottlenecks,
|
| 183 |
+
score=self.score,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def choose_balancing_action(observation: CrisisLogisticsObservation) -> int:
|
| 188 |
+
"""Deterministic heuristic baseline used for smoke tests and offline fallback."""
|
| 189 |
+
|
| 190 |
+
best_idx = 0
|
| 191 |
+
best_score = float("inf")
|
| 192 |
+
for index in range(3):
|
| 193 |
+
projected = observation.hub_loads[:]
|
| 194 |
+
for drain_idx in range(3):
|
| 195 |
+
if drain_idx != index:
|
| 196 |
+
projected[drain_idx] = max(
|
| 197 |
+
0.0, projected[drain_idx] - observation.drain_rates[drain_idx]
|
| 198 |
+
)
|
| 199 |
+
projected[index] += observation.incoming_load
|
| 200 |
+
|
| 201 |
+
balance_gap = max(projected) - min(projected)
|
| 202 |
+
overload_penalty = 40.0 if projected[index] > 100.0 else 0.0
|
| 203 |
+
zone_penalty = sum(abs(load - 50.0) for load in projected) / 3.0
|
| 204 |
+
projected_score = overload_penalty + balance_gap + zone_penalty
|
| 205 |
+
if projected_score < best_score:
|
| 206 |
+
best_score = projected_score
|
| 207 |
+
best_idx = index
|
| 208 |
+
return best_idx
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv[core]>=0.2.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
numpy>=1.26.0
|
| 5 |
+
gymnasium>=0.29.0
|
| 6 |
+
openai>=2.0.0
|
tasks.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import List
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass(frozen=True)
|
| 8 |
+
class TaskConfig:
|
| 9 |
+
task_id: str
|
| 10 |
+
difficulty: str
|
| 11 |
+
title: str
|
| 12 |
+
objective: str
|
| 13 |
+
max_steps: int
|
| 14 |
+
initial_loads: List[float]
|
| 15 |
+
drain_rates: List[float]
|
| 16 |
+
incoming_schedule: List[float]
|
| 17 |
+
event_schedule: List[str]
|
| 18 |
+
target_bottlenecks: int
|
| 19 |
+
target_balance_gap: float
|
| 20 |
+
minimum_avg_reward: float
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
TASKS: dict[str, TaskConfig] = {
|
| 24 |
+
"easy": TaskConfig(
|
| 25 |
+
task_id="easy",
|
| 26 |
+
difficulty="easy",
|
| 27 |
+
title="Steady-State Rebalancing",
|
| 28 |
+
objective="Keep all hubs in the 30-70 utilization band during predictable daytime demand.",
|
| 29 |
+
max_steps=12,
|
| 30 |
+
initial_loads=[28.0, 42.0, 35.0],
|
| 31 |
+
drain_rates=[8.0, 7.0, 6.0],
|
| 32 |
+
incoming_schedule=[9.0, 8.0, 11.0, 10.0, 9.0, 12.0, 8.0, 10.0, 9.0, 11.0, 8.0, 10.0],
|
| 33 |
+
event_schedule=[
|
| 34 |
+
"normal",
|
| 35 |
+
"normal",
|
| 36 |
+
"normal",
|
| 37 |
+
"normal",
|
| 38 |
+
"normal",
|
| 39 |
+
"normal",
|
| 40 |
+
"normal",
|
| 41 |
+
"normal",
|
| 42 |
+
"normal",
|
| 43 |
+
"normal",
|
| 44 |
+
"normal",
|
| 45 |
+
"normal",
|
| 46 |
+
],
|
| 47 |
+
target_bottlenecks=0,
|
| 48 |
+
target_balance_gap=25.0,
|
| 49 |
+
minimum_avg_reward=0.55,
|
| 50 |
+
),
|
| 51 |
+
"medium": TaskConfig(
|
| 52 |
+
task_id="medium",
|
| 53 |
+
difficulty="medium",
|
| 54 |
+
title="Flash Sale Containment",
|
| 55 |
+
objective="Absorb an afternoon flash-sale surge without letting any single hub overload.",
|
| 56 |
+
max_steps=14,
|
| 57 |
+
initial_loads=[34.0, 48.0, 31.0],
|
| 58 |
+
drain_rates=[8.0, 7.0, 6.0],
|
| 59 |
+
incoming_schedule=[10.0, 12.0, 18.0, 22.0, 20.0, 16.0, 11.0, 13.0, 17.0, 14.0, 10.0, 9.0, 8.0, 10.0],
|
| 60 |
+
event_schedule=[
|
| 61 |
+
"normal",
|
| 62 |
+
"normal",
|
| 63 |
+
"flash_sale",
|
| 64 |
+
"flash_sale",
|
| 65 |
+
"flash_sale",
|
| 66 |
+
"weather_disruption",
|
| 67 |
+
"normal",
|
| 68 |
+
"normal",
|
| 69 |
+
"flash_sale",
|
| 70 |
+
"weather_disruption",
|
| 71 |
+
"normal",
|
| 72 |
+
"normal",
|
| 73 |
+
"normal",
|
| 74 |
+
"normal",
|
| 75 |
+
],
|
| 76 |
+
target_bottlenecks=0,
|
| 77 |
+
target_balance_gap=28.0,
|
| 78 |
+
minimum_avg_reward=0.48,
|
| 79 |
+
),
|
| 80 |
+
"hard": TaskConfig(
|
| 81 |
+
task_id="hard",
|
| 82 |
+
difficulty="hard",
|
| 83 |
+
title="Cascading Disruption Recovery",
|
| 84 |
+
objective="Stabilize the network through repeated surge waves and weather disruptions while preserving throughput.",
|
| 85 |
+
max_steps=16,
|
| 86 |
+
initial_loads=[45.0, 57.0, 40.0],
|
| 87 |
+
drain_rates=[8.0, 7.0, 6.0],
|
| 88 |
+
incoming_schedule=[16.0, 22.0, 19.0, 24.0, 18.0, 20.0, 23.0, 14.0, 25.0, 17.0, 15.0, 21.0, 13.0, 18.0, 14.0, 12.0],
|
| 89 |
+
event_schedule=[
|
| 90 |
+
"weather_disruption",
|
| 91 |
+
"flash_sale",
|
| 92 |
+
"weather_disruption",
|
| 93 |
+
"flash_sale",
|
| 94 |
+
"normal",
|
| 95 |
+
"weather_disruption",
|
| 96 |
+
"flash_sale",
|
| 97 |
+
"normal",
|
| 98 |
+
"flash_sale",
|
| 99 |
+
"weather_disruption",
|
| 100 |
+
"normal",
|
| 101 |
+
"flash_sale",
|
| 102 |
+
"normal",
|
| 103 |
+
"weather_disruption",
|
| 104 |
+
"normal",
|
| 105 |
+
"normal",
|
| 106 |
+
],
|
| 107 |
+
target_bottlenecks=1,
|
| 108 |
+
target_balance_gap=35.0,
|
| 109 |
+
minimum_avg_reward=0.40,
|
| 110 |
+
),
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def list_tasks() -> List[TaskConfig]:
|
| 115 |
+
return [TASKS["easy"], TASKS["medium"], TASKS["hard"]]
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def get_task(task_id: str) -> TaskConfig:
|
| 119 |
+
return TASKS[task_id]
|
test_engine.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
|
| 3 |
+
from models import CrisisLogisticsAction
|
| 4 |
+
from server.crisis_logistics_env_environment import (
|
| 5 |
+
CrisisLogisticsEnvironment,
|
| 6 |
+
choose_balancing_action,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
async def test_run():
|
| 11 |
+
print("--- BOOTING LOGIFLOW-RL SIMULATOR ---")
|
| 12 |
+
env = CrisisLogisticsEnvironment()
|
| 13 |
+
|
| 14 |
+
print("\n[INIT] Resetting Environment...")
|
| 15 |
+
obs = env.reset(task_id="medium")
|
| 16 |
+
print(f"Task: {obs.task_id} ({obs.difficulty})")
|
| 17 |
+
print(f"Objective: {obs.objective}")
|
| 18 |
+
print(f"Hub Loads: {obs.hub_loads}")
|
| 19 |
+
print(f"Incoming Load: {obs.incoming_load}")
|
| 20 |
+
print(f"Event Type: {obs.event_label}")
|
| 21 |
+
|
| 22 |
+
suggested_hub = choose_balancing_action(obs)
|
| 23 |
+
print(f"\n[POLICY] Baseline sends shipment to hub {suggested_hub}...")
|
| 24 |
+
obs = env.step(CrisisLogisticsAction(target_hub=suggested_hub))
|
| 25 |
+
|
| 26 |
+
print(f"Updated Loads: {obs.hub_loads}")
|
| 27 |
+
print(f"Reward Received: {obs.reward}")
|
| 28 |
+
print(f"Current Score: {obs.cumulative_score}")
|
| 29 |
+
print(f"Next Incoming Load: {obs.incoming_load}")
|
| 30 |
+
print(f"Message: {obs.message}")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
asyncio.run(test_run())
|
train_and_evaluate.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
from graders import EpisodeMetrics, grade_episode
|
| 6 |
+
from models import CrisisLogisticsAction
|
| 7 |
+
from server.crisis_logistics_env_environment import (
|
| 8 |
+
CrisisLogisticsEnvironment,
|
| 9 |
+
choose_balancing_action,
|
| 10 |
+
)
|
| 11 |
+
from tasks import list_tasks
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@dataclass
|
| 15 |
+
class EpisodeSummary:
|
| 16 |
+
task_id: str
|
| 17 |
+
total_reward: float
|
| 18 |
+
score: float
|
| 19 |
+
bottlenecks: int
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def run_policy(task_id: str, policy: str) -> EpisodeSummary:
|
| 23 |
+
env = CrisisLogisticsEnvironment()
|
| 24 |
+
observation = env.reset(task_id=task_id)
|
| 25 |
+
round_robin_step = 0
|
| 26 |
+
|
| 27 |
+
while not observation.done:
|
| 28 |
+
if policy == "round_robin":
|
| 29 |
+
action = round_robin_step % 3
|
| 30 |
+
round_robin_step += 1
|
| 31 |
+
else:
|
| 32 |
+
action = choose_balancing_action(observation)
|
| 33 |
+
observation = env.step(CrisisLogisticsAction(target_hub=action))
|
| 34 |
+
|
| 35 |
+
metrics = EpisodeMetrics(
|
| 36 |
+
total_reward=env.total_reward,
|
| 37 |
+
average_reward=env.total_reward / max(env.step_count, 1),
|
| 38 |
+
bottlenecks=env.bottlenecks,
|
| 39 |
+
optimal_steps=env.optimal_steps,
|
| 40 |
+
average_balance_gap=sum(env.balance_gap_history) / max(len(env.balance_gap_history), 1),
|
| 41 |
+
throughput_served=env.throughput_served,
|
| 42 |
+
steps_completed=env.step_count,
|
| 43 |
+
)
|
| 44 |
+
score = grade_episode(env.task, metrics)
|
| 45 |
+
return EpisodeSummary(
|
| 46 |
+
task_id=task_id,
|
| 47 |
+
total_reward=round(env.total_reward, 2),
|
| 48 |
+
score=score,
|
| 49 |
+
bottlenecks=env.bottlenecks,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def main() -> None:
|
| 54 |
+
print("LogiFlow-RL Benchmarks")
|
| 55 |
+
print("----------------------")
|
| 56 |
+
for policy in ("round_robin", "heuristic"):
|
| 57 |
+
print(f"\nPolicy: {policy}")
|
| 58 |
+
scores = []
|
| 59 |
+
for task in list_tasks():
|
| 60 |
+
summary = run_policy(task.task_id, policy)
|
| 61 |
+
scores.append(summary.score)
|
| 62 |
+
print(
|
| 63 |
+
f"{summary.task_id:6} | reward={summary.total_reward:6.2f} | "
|
| 64 |
+
f"score={summary.score:0.3f} | bottlenecks={summary.bottlenecks}"
|
| 65 |
+
)
|
| 66 |
+
avg_score = sum(scores) / len(scores)
|
| 67 |
+
print(f"average | score={avg_score:0.3f}")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__":
|
| 71 |
+
main()
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|