adityss commited on
Commit
c588022
·
1 Parent(s): 0c3f011

Enhance Dockerfile and project structure for GridMind-RL

Browse files

- Updated Dockerfile to create run directories for supervisor and adjusted user permissions.
- Modified CMD to run supervisor in the foreground for better process management.
- Revised baseline_scores.json with updated task averages and overall average scores.
- Added PKG-INFO file for package metadata and dependencies.
- Created SOURCES.txt, dependency_links.txt, entry_points.txt, requires.txt, and top_level.txt for package structure.
- Implemented FastAPI server in app.py to proxy requests to the Go environment server.
- Updated requirements.txt and pyproject.toml to include necessary dependencies and project metadata.
- Added __init__.py files for Python package structure.

Dockerfile CHANGED
@@ -49,12 +49,17 @@ RUN echo "[supervisord]" > /etc/supervisor/conf.d/supervisord.conf && \
49
  echo "stderr_logfile=/dev/stderr" >> /etc/supervisor/conf.d/supervisord.conf && \
50
  echo "stderr_logfile_maxbytes=0" >> /etc/supervisor/conf.d/supervisord.conf
51
 
 
 
 
 
52
  # Add a non-root user (good practice and required for some HF Spaces configs)
53
- RUN useradd -m -u 1000 user
54
- RUN chown -R user:user /app
55
- USER user
56
 
57
  # 7860 = Env Server (main OpenEnv endpoint), 7861 = Dashboard
58
  EXPOSE 7860 7861
59
 
60
- CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
 
 
49
  echo "stderr_logfile=/dev/stderr" >> /etc/supervisor/conf.d/supervisord.conf && \
50
  echo "stderr_logfile_maxbytes=0" >> /etc/supervisor/conf.d/supervisord.conf
51
 
52
+ # Create run directory for supervisor
53
+ RUN mkdir -p /var/run/supervisor /var/log/supervisor && \
54
+ chmod 755 /var/run/supervisor /var/log/supervisor
55
+
56
  # Add a non-root user (good practice and required for some HF Spaces configs)
57
+ RUN useradd -m -u 1000 user && \
58
+ chown -R user:user /app && \
59
+ chown -R user:user /var/run/supervisor /var/log/supervisor
60
 
61
  # 7860 = Env Server (main OpenEnv endpoint), 7861 = Dashboard
62
  EXPOSE 7860 7861
63
 
64
+ # Run supervisor as root to manage both services (required for multi-process supervision)
65
+ CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf", "-n"]
baseline_scores.json CHANGED
@@ -7,33 +7,33 @@
7
  "llm_every": 4,
8
  "max_steps": null,
9
  "task_averages": {
10
- "1": 0.7063,
11
- "2": 0.6333,
12
- "3": 0.5966
13
  },
14
- "overall_average": 0.6454,
15
  "all_results": [
16
  {
17
  "task_id": 1,
18
  "seed": 1100,
19
- "total_reward": 251.40178983938813,
20
- "total_steps": 96,
21
- "elapsed_sec": 1.2027783393859863,
22
- "score": 0.7063,
23
  "sub_scores": {
24
- "cost": 0.7063441549865395
25
  },
26
  "exploit_detected": false
27
  },
28
  {
29
  "task_id": 2,
30
  "seed": 1200,
31
- "total_reward": 246.40262234598185,
32
- "total_steps": 96,
33
- "elapsed_sec": 1.2154290676116943,
34
- "score": 0.6333,
35
  "sub_scores": {
36
- "cost": 0.7014155357169216,
37
  "temperature": 0.53125
38
  },
39
  "exploit_detected": false
@@ -41,16 +41,16 @@
41
  {
42
  "task_id": 3,
43
  "seed": 1300,
44
- "total_reward": 255.60231973463087,
45
- "total_steps": 96,
46
- "elapsed_sec": 1.2423679828643799,
47
- "score": 0.5966,
48
  "sub_scores": {
49
  "batch_deadline": 1,
50
- "carbon": 0.6574530318382599,
51
- "cost": 0.670084941969173,
52
  "grid_response": 0.21428571428571427,
53
- "temperature": 0.5729166666666666
54
  },
55
  "exploit_detected": false
56
  }
 
7
  "llm_every": 4,
8
  "max_steps": null,
9
  "task_averages": {
10
+ "1": 0.708,
11
+ "2": 0.6328,
12
+ "3": 0.5983
13
  },
14
+ "overall_average": 0.6463666666666666,
15
  "all_results": [
16
  {
17
  "task_id": 1,
18
  "seed": 1100,
19
+ "total_reward": 246.42219784256966,
20
+ "total_steps": 94,
21
+ "elapsed_sec": 1.5613129138946533,
22
+ "score": 0.708,
23
  "sub_scores": {
24
+ "cost": 0.7079636116620143
25
  },
26
  "exploit_detected": false
27
  },
28
  {
29
  "task_id": 2,
30
  "seed": 1200,
31
+ "total_reward": 242.81120610868118,
32
+ "total_steps": 95,
33
+ "elapsed_sec": 1.594855785369873,
34
+ "score": 0.6328,
35
  "sub_scores": {
36
+ "cost": 0.7005224090103834,
37
  "temperature": 0.53125
38
  },
39
  "exploit_detected": false
 
41
  {
42
  "task_id": 3,
43
  "seed": 1300,
44
+ "total_reward": 251.7133773862143,
45
+ "total_steps": 94,
46
+ "elapsed_sec": 1.6321852207183838,
47
+ "score": 0.5983,
48
  "sub_scores": {
49
  "batch_deadline": 1,
50
+ "carbon": 0.6563888726735232,
51
+ "cost": 0.6695079035324871,
52
  "grid_response": 0.21428571428571427,
53
+ "temperature": 0.5833333333333334
54
  },
55
  "exploit_detected": false
56
  }
gridmind_rl.egg-info/PKG-INFO ADDED
@@ -0,0 +1,667 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: gridmind-rl
3
+ Version: 1.0.0
4
+ Summary: GridMind-RL: Industrial Load-Shaping and Demand-Response RL Environment. Control HVAC, thermal storage, and batch job scheduling under stochastic electricity prices and grid stress events.
5
+ Author: LOKyu Team
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/meta-pytorch/OpenEnv
8
+ Project-URL: Repository, https://github.com/meta-pytorch/OpenEnv
9
+ Project-URL: Documentation, https://github.com/meta-pytorch/OpenEnv
10
+ Keywords: reinforcement-learning,openenv,energy-management,demand-response
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: GPU
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Natural Language :: English
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: openai>=1.0.0
27
+ Requires-Dist: openenv-core>=0.2.0
28
+ Requires-Dist: fastapi>=0.100.0
29
+ Requires-Dist: uvicorn>=0.23.0
30
+ Requires-Dist: pydantic>=2.0.0
31
+ Requires-Dist: requests>=2.31.0
32
+ Requires-Dist: httpx>=0.24.0
33
+ Requires-Dist: pytest>=7.0.0
34
+ Requires-Dist: python-dotenv>=1.0.0
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
37
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
38
+ Requires-Dist: black>=23.0.0; extra == "dev"
39
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
40
+ Dynamic: license-file
41
+
42
+ # 🏢 GridMind-RL — Energy Management Reinforcement Learning Environment
43
+
44
+ **A real-world RL environment for intelligent building energy optimization.** Control HVAC systems, thermal storage, batch job scheduling, and demand-response under stochastic electricity prices and grid stress events.
45
+
46
+ Built on the [OpenEnv](https://github.com/meta-pytorch/OpenEnv) specification. Containerized. Ready for Hugging Face Spaces deployment.
47
+
48
+ ---
49
+
50
+ ## 📖 Overview & Motivation
51
+
52
+ Building energy management is a **real-world optimization problem** facing utilities, facility operators, and industrial sites globally. Traditional rule-based controls waste billions in energy costs and miss opportunities for grid participation.
53
+
54
+ **GridMind-RL** simulates decisions that facility operators must make daily:
55
+
56
+ - **Cost Optimization** — Buy electricity when prices are low, avoid peak surcharges
57
+ - **Comfort & Safety** — Maintain indoor temperature within acceptable ranges while managing thermal inertia
58
+ - **Grid Participation** — Respond to demand-response signals and grid stress events
59
+ - **Batch Scheduling** — Coordinate industrial process timings to meet deadlines and minimize energy cost
60
+ - **Carbon Minimization** — Shift consumption to periods when grid carbon intensity is low
61
+
62
+ **Why this matters:** An RL agent trained in this environment can learn strategies that would be difficult or impossible for humans to hand-craft. The combination of continuous control (HVAC power, thermal storage), discrete decisions (batch scheduling), and multiple simultaneous objectives (cost, comfort, grid, deadlines, carbon) creates a realistic, challenging benchmark.
63
+
64
+ **Episode Length:** 96 steps = 24 hours at 15-minute resolution. A complete episode requires strategic decision-making across a full day-night cycle.
65
+
66
+ ---
67
+
68
+ ## � Observation Space
69
+
70
+ At each timestep, the environment provides the following observations. **Episode length: 96 steps** (15-minute intervals = 24 hours).
71
+
72
+ | Field | Data Type | Range / Values | Description |
73
+ |-------|-----------|-----------------|-------------|
74
+ | `indoor_temperature` | float | 10–40 °C | Current building interior temperature |
75
+ | `thermal_storage_level` | float | 0.0–1.0 | Thermal tank charge state (0 = empty, 1 = full) |
76
+ | `process_demand` | float | ≥ 0 kW | Current industrial batch process power draw |
77
+ | `current_price` | float | > 0 $/kWh | Real-time spot electricity price |
78
+ | `grid_stress_signal` | float | 0.0–1.0 | Utility demand-response urgency (0.7+ = critical) |
79
+ | `carbon_intensity` | float | ≥ 0 gCO₂/kWh | Current grid carbon intensity |
80
+ | `hour_of_day` | int | 0–23 | Time-of-day context |
81
+ | `batch_queue` | int array | — | Pending batch jobs with deadline slots |
82
+ | `cumulative_cost` | float | ≥ 0 $ | Energy cost accumulated in current episode so far |
83
+ | `step` | int | 0–95 | Current timestep (96 total = 24 hours) |
84
+ | `building_id` | int | 0+ | Building identifier (for multi-building scenarios) |
85
+
86
+ **Observation Properties:**
87
+ - Observations are **deterministic** given the seed — same seed produces identical sequences
88
+ - All fields are **normalized or bounded** for stable learning
89
+ - Prices follow realistic time-of-use patterns; carbon intensity varies with grid mix
90
+ - Batch queue starts empty; jobs appear stochastically based on the task/seed
91
+
92
+ ---
93
+
94
+ ## 🎮 Action Space
95
+
96
+ At each step, the agent sends an action controlling four independent subsystems:
97
+
98
+ | Field | Data Type | Range | Description |
99
+ |-------|-----------|-------|-------------|
100
+ | `hvac_power_level` | float | 0.0–1.0 | HVAC system power (0 = off, 1 = full) |
101
+ | `thermal_charge_rate` | float | -1.0–1.0 | Thermal storage control (+charge, -discharge) |
102
+ | `batch_job_slot` | int | 0–4 | Schedule next batch job: 0=immediate, 1–4=defer |
103
+ | `load_shed_fraction` | float | 0.0–0.5 | Non-critical load reduction (0–50%) for demand-response |
104
+ | `building_id` | int | 0+ | Building identifier (routing) |
105
+
106
+ **Action Space Properties:**
107
+ - **Continuous** (HVAC, thermal charging, load shedding) + **discrete** (batch scheduling) → hybrid control
108
+ - Actions are applied every 15-minute step
109
+ - Load shedding is capped at 50% to ensure safety/habitability
110
+ - Batch scheduling decisions affect energy cost and deadline compliance
111
+
112
+ ---
113
+
114
+ ## 💡 Reward Function
115
+
116
+ The environment provides **dense rewards every step** (not sparse, not binary). Each step returns:
117
+ - A scalar reward (sum of components)
118
+ - A dictionary of 7 weighted sub-components for transparency
119
+
120
+ | Component | Purpose | Possible Values |
121
+ |-----------|---------|-----------------|
122
+ | **cost_savings** | Minimize energy bill | Negative (cost increases) to positive (savings vs baseline) |
123
+ | **temp_constraint** | Maintain comfort | Gaussian bonus near 21°C, penalty outside 19–23°C bounds |
124
+ | **grid_response** | Shift load during stress | Bonus proportional to shed fraction when grid signal > 0.7 |
125
+ | **efficiency_bonus** | Exploit thermal storage | Reward charge/discharge timing and thermal arbitrage |
126
+ | **stability_penalty** | Smooth control | Small penalty for rapid oscillations in HVAC/storage |
127
+ | **deadline_penalty** | Meet job deadlines | Large penalty if batch job finishes after deadline |
128
+ | **carbon_reward** | Low-carbon consumption | Bonus for consuming during low-carbon grid periods |
129
+
130
+ **Example Reward Calculation:**
131
+ If an agent takes a well-timed action during high-price, high-stress period:
132
+ - Large positive `cost_savings` (avoided expensive hour)
133
+ - Positive `grid_response` (shed load successfully)
134
+ - Possible positive `carbon_reward` (if grid is clean)
135
+ - **Total step reward** = weighted sum of all components
136
+
137
+ This multi-objective reward structure encourages **learning tradeoffs** between cost, comfort, grid support, and carbon efficiency.
138
+
139
+ ---
140
+
141
+ ---
142
+
143
+ ## 📋 Tasks & Difficulty Levels
144
+
145
+ Three independent tasks with **deterministic programmatic graders**. Scores range **0.0–1.0**; higher is better.
146
+
147
+ ### Task 1 — Cost Minimization (🟢 Easy)
148
+
149
+ **Objective:** Minimize total energy cost in 24 hours with no other constraints.
150
+
151
+ **Difficulty Rationale:** Only one objective (cost) to optimize; temperature and grid constraints are relaxed.
152
+
153
+ **Grader Metrics:**
154
+ - **Cost score (100%)** — Compares total episode energy cost to a deterministic baseline. Higher savings → higher score.
155
+
156
+ **Baseline Score:** **0.7063**
157
+
158
+ ---
159
+
160
+ ### Task 2 — Constrained Temperature Control (🟡 Medium)
161
+
162
+ **Objective:** Minimize cost while maintaining indoor temperature between **19–23°C** throughout the episode.
163
+
164
+ **Difficulty Rationale:** Introduces a hard constraint (temperature bounds). Agent must use thermal storage strategically to meet both cost and comfort goals.
165
+
166
+ **Grader Metrics:**
167
+ - **Cost score (60%)** — Total energy cost vs baseline
168
+ - **Temperature score (40%)** — Fraction of steps within bounds (hard penalty for violations)
169
+
170
+ **Notes:** A naive agent might achieve low cost by disabling HVAC, but then temperatures drift out of bounds (0 score). Trade-off learning is required.
171
+
172
+ **Baseline Score:** **0.6333**
173
+
174
+ ---
175
+
176
+ ### Task 3 — Full Demand Response (🔴 Hard)
177
+
178
+ **Objective:** Minimize cost, maintain temperature, respond to grid events, complete batch jobs on time, and minimize carbon emissions. This is a **multi-objective constraint satisfaction** problem.
179
+
180
+ **Difficulty Rationale:** Most realistic. Agent must balance five competing objectives simultaneously; any single failure is costly.
181
+
182
+ **Grader Metrics:**
183
+ - **Cost score (28%)** — Energy cost
184
+ - **Temperature score (20%)** — Time within comfort bounds
185
+ - **Grid response score (20%)** — Load shed during demand-response events (signal > 0.7)
186
+ - **Batch deadline score (12%)** — Fraction of jobs completed before deadline
187
+ - **Carbon reward score (20%)** — Shift load to low-carbon periods
188
+
189
+ **Baseline Breakdown:**
190
+ - Cost: 0.670, Temperature: 0.573, Grid: 0.214, Batch: 1.000, Carbon: 0.657
191
+ - **Overall: 0.5966**
192
+
193
+ **Challenge:** Grid response score (~0.21) shows that the baseline heuristic rarely sheds load opportunistically. Learning agents should discover that quick load shedding during high-price, high-stress periods yields significant cost savings.
194
+
195
+ **Grader Determinism:** Same seed always produces identical evaluations. Episodes are seeded internally; reproducible batches of evaluations can be generated for benchmark comparisons.
196
+
197
+ ---
198
+
199
+ ## 🚀 Setup & Usage
200
+
201
+ ### Prerequisites
202
+
203
+ - **Docker** — [Download Docker Desktop](https://www.docker.com/products/docker-desktop/)
204
+ - **Python 3.10+** — [Download Python](https://www.python.org/downloads/)
205
+ - **Git** — [Download Git](https://git-scm.com/downloads)
206
+
207
+ ### Quick Start (5 minutes)
208
+
209
+ #### 1. Clone the Repository
210
+
211
+ ```bash
212
+ git clone https://github.com/LO-Kyu/gridmind-rl.git
213
+ cd gridmind-rl
214
+ ```
215
+
216
+ #### 2. Build and Start the Environment Server
217
+
218
+ ```bash
219
+ docker build -t gridmind-rl .
220
+ docker run --rm -d -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl
221
+ ```
222
+
223
+ Verify the server is running:
224
+
225
+ ```bash
226
+ # Check health endpoint
227
+ curl http://localhost:7860/health
228
+ # Expected: {"status":"ok","version":"1.0.0"}
229
+ ```
230
+
231
+ #### 3. Install Python Dependencies
232
+
233
+ Open a **new terminal** and install:
234
+
235
+ ```bash
236
+ pip install -r python/requirements.txt
237
+ ```
238
+
239
+ #### 4. Run Inference (No LLM — Fast)
240
+
241
+ Run a fast, deterministic baseline using heuristic policy:
242
+
243
+ ```bash
244
+ python inference.py --fast-mode --episodes 1
245
+ ```
246
+
247
+ Expected output (sample):
248
+ ```
249
+ [START] task=Cost_Minimization env=gridmind model=heuristic
250
+ [STEP1] step=1 action={...} reward=10.5 done=false
251
+ [STEP2] step=2 action={...} reward=12.3 done=false
252
+ ...
253
+ [STEP96] step=96 action={...} reward=8.9 done=true
254
+ [END] success=true steps=96 rewards=[10.5, 12.3, ..., 8.9]
255
+ ```
256
+
257
+ Results saved to: `baseline_scores.json`
258
+
259
+ #### 5. (Optional) Run with LLM
260
+
261
+ To use an LLM agent for decision-making:
262
+
263
+ 1. Get a **free API key** from [openrouter.ai/keys](https://openrouter.ai/keys) (no credit card needed)
264
+ 2. Create `.env` file (copy from `.env.example`):
265
+ ```bash
266
+ cp .env.example .env
267
+ ```
268
+ 3. Edit `.env` and add your API key:
269
+ ```env
270
+ HF_TOKEN=sk-or-v1-your-key-here
271
+ # or
272
+ OPENAI_API_KEY=sk-or-v1-your-key-here
273
+ ```
274
+ 4. Run with LLM:
275
+ ```bash
276
+ python inference.py --episodes 1
277
+ ```
278
+
279
+ #### 6. Stop the Server (When Done)
280
+
281
+ ```bash
282
+ docker stop gridmind
283
+ ```
284
+
285
+ ---
286
+
287
+ ### Inference Script Reference
288
+
289
+ The `inference.py` script (project root) is the **hackathon submission entrypoint**.
290
+
291
+ **Environment Variables:**
292
+
293
+ | Variable | Default | Description |
294
+ |----------|---------|-------------|
295
+ | `HF_TOKEN` | (required for submission) | API key for LLM provider or HF Spaces |
296
+ | `OPENAI_API_KEY` | (optional fallback) | Alternative OpenAI-compatible key |
297
+ | `API_BASE_URL` | `https://openrouter.ai/api/v1` | LLM endpoint URL |
298
+ | `MODEL_NAME` | `meta-llama/llama-3.3-70b-instruct:free` | Model identifier |
299
+ | `ENV_URL` | `http://localhost:7860` | Environment server address |
300
+
301
+ **Command-Line Flags:**
302
+
303
+ | Flag | Default | Description |
304
+ |------|---------|-------------|
305
+ | `--episodes N` | 1 | Episodes per task (runs tasks 1, 2, 3 in sequence) |
306
+ | `--fast-mode` | off | Don't call LLM; use heuristic policy only (reproducible, no API calls) |
307
+ | `--llm-every N` | 4 | Reuse each LLM decision for N steps (reduces API calls) |
308
+ | `--max-steps N` | 96 | Stop episode early after N steps |
309
+ | `--env-url URL` | from env var | Override environment server URL |
310
+ | `--output FILE` | `baseline_scores.json` | Output results filename |
311
+ | `--verbose` | off | Print detailed logs for each step |
312
+
313
+ **Examples:**
314
+
315
+ ```bash
316
+ # Run all 3 tasks with LLM (1 episode each)
317
+ python inference.py --episodes 1
318
+
319
+ # Reproduce baseline fast (no LLM)
320
+ python inference.py --fast-mode --episodes 1
321
+
322
+ # Only Task 2, heuristic, verbose output
323
+ python inference.py --fast-mode --episodes 1 --verbose
324
+
325
+ # Run 5 episodes per task with custom environment
326
+ python inference.py --episodes 5 --env-url http://my-server:7860
327
+ ```
328
+
329
+ ---
330
+
331
+ ### HTTP API Reference
332
+
333
+ **Base URL:** `http://localhost:7860`
334
+
335
+ | Endpoint | Method | Purpose | Example Body |
336
+ |----------|--------|---------|---------------|
337
+ | `/health` | GET | Liveness check | — |
338
+ | `/ping` | GET | Lightweight ping | — |
339
+ | `/reset` | POST | Reset episode for a task | `{"task_id": 1, "seed": 42}` |
340
+ | `/step` | POST | Apply action, get next observation | `{"hvac_power_level": 0.5, "thermal_charge_rate": 0.1, ...}` |
341
+ | `/state` | GET | Current full state snapshot | — |
342
+ | `/grade` | GET | Episode score (0.0–1.0) with sub-scores | — |
343
+ | `/replay` | GET | Full step-by-step trajectory | — |
344
+ | `/tasks` | GET | Task definitions and grader weights | — |
345
+ | `/metrics` | GET | Prometheus-format metrics | — |
346
+
347
+ **Example Workflow:**
348
+
349
+ ```bash
350
+ # 1. Reset to Task 1 with seed 42
351
+ curl -X POST http://localhost:7860/reset \
352
+ -H "Content-Type: application/json" \
353
+ -d '{"task_id": 1, "seed": 42}'
354
+
355
+ # 2. Get initial observation
356
+ curl http://localhost:7860/state
357
+
358
+ # 3. Take an action
359
+ curl -X POST http://localhost:7860/step \
360
+ -H "Content-Type: application/json" \
361
+ -d '{
362
+ "hvac_power_level": 0.5,
363
+ "thermal_charge_rate": 0.1,
364
+ "batch_job_slot": 1,
365
+ "load_shed_fraction": 0.0
366
+ }'
367
+
368
+ # 4. Check final score after episode completes
369
+ curl http://localhost:7860/grade
370
+ ```
371
+
372
+ ---
373
+
374
+ ## 📊 Baseline Performance Scores
375
+
376
+ The baseline is a **heuristic policy** (rule-based, no LLM) representing a reasonable but non-optimized control strategy. Your RL agent should aim to exceed these scores.
377
+
378
+ **Baseline Run:** `python inference.py --fast-mode --episodes 1`
379
+
380
+ ### Summary Scores
381
+
382
+ | Task | Difficulty | Score | Model |
383
+ |------|:----------:|:-----:|-------|
384
+ | Task 1 — Cost Minimization | 🟢 Easy | **0.7063** | Heuristic |
385
+ | Task 2 — Temperature Control | 🟡 Medium | **0.6333** | Heuristic |
386
+ | Task 3 — Full Demand Response | 🔴 Hard | **0.5966** | Heuristic |
387
+ | **Overall Average** | — | **0.6454** | Heuristic |
388
+
389
+ ### Detailed Breakdown
390
+
391
+ #### Task 1 Results
392
+ - **Task:** Cost minimization (96 hours × 15 min = 24 hours)
393
+ - **Score:** 0.7063
394
+ - **Sub-score:** Cost = 0.706
395
+ - **Interpretation:** Heuristic achieves ~70% of optimal cost reduction vs baseline
396
+
397
+ #### Task 2 Results
398
+ - **Task:** Minimize cost while maintaining temperature 19–23°C
399
+ - **Score:** 0.6333
400
+ - **Sub-scores:**
401
+ - Cost: 0.701
402
+ - Temperature constraint: 0.531 (agent violated comfort bounds ~47% of the time)
403
+ - **Interpretation:** Temperature management is challenging for the heuristic. Tighter thermal control could improve this score significantly.
404
+
405
+ #### Task 3 Results (Most Interesting)
406
+ - **Task:** Multi-objective: cost, temperature, grid response, batch deadlines, carbon
407
+ - **Score:** 0.5966
408
+ - **Sub-scores:**
409
+ - Cost: 0.670
410
+ - Temperature: 0.573 (similar temperature control challenge as Task 2)
411
+ - **Grid response: 0.214** ← Heuristic rarely participates in demand-response
412
+ - Batch deadline: 1.000 (heuristic always completes jobs on time)
413
+ - Carbon: 0.657
414
+
415
+ **Key Insight:** The heuristic's low grid response score (0.21) suggests that learned agents have significant room for improvement by:
416
+ 1. Recognizing high-price + high-stress periods
417
+ 2. Proactively shedding load to reduce cost
418
+ 3. Using thermal storage to recover comfort afterward
419
+
420
+ This multi-objective setting is where RL agents typically exceed heuristic baselines.
421
+
422
+ ### Reproducibility & Evaluation
423
+
424
+ - **Deterministic:** Baseline scores are **deterministic** — same seed always produces identical actions and rewards
425
+ - **Seeding:** Each task uses a fixed base seed (1100, 1200, 1300) for reproducible evaluation
426
+ - **Your Submissions:** Your agent will be evaluated on the same seed distribution; compare your scores directly to baseline
427
+
428
+ ---
429
+
430
+ ## 🏗️ Architecture
431
+
432
+ ```
433
+ ┌─────────────────────────────────────────────────────────────────┐
434
+ │ inference.py (LLM Agent or Heuristic) │
435
+ │ │ │
436
+ │ │ HTTP: POST /reset, /step · GET /grade, /state │
437
+ │ ▼ │
438
+ │ ┌───────────────────────────────────────────────────────────┐ │
439
+ │ │ Docker Container │ │
440
+ │ │ │ │
441
+ │ │ ┌─────────────────────┐ ┌───────────────────────────┐ │ │
442
+ │ │ │ Go Environment │ │ Python Dashboard │ │ │
443
+ │ │ │ Server (:7860) │ │ FastAPI + UI (:7861) │ │ │
444
+ │ │ │ │ │ │ │ │
445
+ │ │ │ • Physics engine │ │ • Proxies /api → :7860 │ │ │
446
+ │ │ │ • Reward function │◄──│ • Real-time charts │ │ │
447
+ │ │ │ • Task graders │ │ • State visualization │ │ │
448
+ │ │ └─────────────────────┘ └───────────────────────────┘ │ │
449
+ │ │ │ │
450
+ │ │ Isolated · Reproducible · Non-root user │ │
451
+ │ └───────────────────────────────────────────────────────────┘ │
452
+ └─────────────────────────────────────────────────────────────────┘
453
+ ```
454
+
455
+ ### Project Structure
456
+
457
+ ```
458
+ gridmind/
459
+ ├── inference.py ← Hackathon entrypoint (root)
460
+ ├── openenv.yaml ← OpenEnv spec manifest
461
+ ├── Dockerfile ← Multi-stage build (Go + Python)
462
+ ├── .env ← API credentials (git-ignored)
463
+ ├── baseline_scores.json ← Produced by inference.py
464
+
465
+ ├── main.go ← HTTP server (routes, middleware, metrics)
466
+ ├── env/ ← Core environment logic (Go)
467
+ │ ├── environment.go ← Simulation: physics, thermal dynamics
468
+ │ ├── models.go ← All data types (Observation, Action, etc.)
469
+ │ ├── rewards.go ← 7-component dense reward function
470
+ │ └── tasks.go ← 3 task definitions + deterministic graders
471
+
472
+ ├── python/ ← Python support layer
473
+ │ ├── inference.py ← Full LLM agent + heuristic fallback
474
+ │ ├── models.py ← Typed Pydantic models (mirrors Go structs)
475
+ │ ├── validate.py ← OpenEnv spec validation suite
476
+ │ └── requirements.txt ← Python dependencies
477
+
478
+ ├── tests/ ← Automated tests
479
+ │ ├── environment_test.go ← Go unit tests (determinism, bounds, etc.)
480
+ │ └── test_graders.py ← Python grader tests (pytest)
481
+
482
+ └── dashboard/ ← Optional web dashboard
483
+ ├── server.py ← FastAPI server
484
+ └── static/ ← Frontend assets
485
+ ```
486
+
487
+ ---
488
+
489
+ ## 🐳 Docker
490
+
491
+ | Action | Command |
492
+ |--------|---------|
493
+ | **Build** | `docker build -t gridmind-rl .` |
494
+ | **Run (foreground)** | `docker run --rm -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl` |
495
+ | **Run (background)** | `docker run --rm -d -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl` |
496
+ | **Stop** | `docker stop gridmind` |
497
+ | **Run inference inside container** | `docker exec -it gridmind python /app/inference.py --fast-mode` |
498
+
499
+ The Dockerfile uses a **multi-stage build**:
500
+ 1. **Stage 1** — Go 1.21 Alpine: compiles the environment server binary
501
+ 2. **Stage 2** — Python 3.11 slim: runs the Go binary + Python dashboard via Supervisor
502
+
503
+ ---
504
+
505
+ ## ☁️ Hugging Face Space Deployment
506
+
507
+ ### 1. Create a New Space
508
+
509
+ Go to [huggingface.co/new-space](https://huggingface.co/new-space):
510
+ - **SDK:** Docker
511
+ - **Hardware:** CPU Basic (2 vCPU, 16 GB — free tier)
512
+
513
+ ### 2. Push to HF
514
+
515
+ ```bash
516
+ git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/gridmind-rl
517
+ git push hf main
518
+ ```
519
+
520
+ ### 3. Verify
521
+
522
+ ```bash
523
+ curl https://YOUR_USERNAME-gridmind-rl.hf.space/health
524
+ # → {"status":"ok","version":"1.0.0"}
525
+
526
+ curl -X POST https://YOUR_USERNAME-gridmind-rl.hf.space/reset \
527
+ -H "Content-Type: application/json" \
528
+ -d '{"task_id":1,"seed":42}'
529
+ ```
530
+
531
+ > **Note:** HF Spaces exposes port **7860** publicly. The dashboard (7861) is for local development only.
532
+
533
+ ---
534
+
535
+ ## 🧪 Testing
536
+
537
+ ### Run Go Unit Tests
538
+
539
+ ```bash
540
+ cd gridmind
541
+ go test ./tests/ -v
542
+ ```
543
+
544
+ ### Run Python Grader Tests (requires server running)
545
+
546
+ ```bash
547
+ pytest tests/test_graders.py -v
548
+ ```
549
+
550
+ ### Run Full OpenEnv Validation
551
+
552
+ ```bash
553
+ python python/validate.py --env-url http://localhost:7860
554
+ ```
555
+
556
+ ---
557
+
558
+ ## 📝 Inference Script Reference
559
+
560
+ The `inference.py` script at the project root is the **hackathon entrypoint**.
561
+
562
+ ### Environment Variables
563
+
564
+ | Variable | Default | Description |
565
+ |----------|---------|-------------|
566
+ | `API_BASE_URL` | `https://openrouter.ai/api/v1` | LLM API endpoint |
567
+ | `MODEL_NAME` | `meta-llama/llama-3.1-8b-instruct:free` | Model to use |
568
+ | `OPENAI_API_KEY` | — | API key (any OpenAI-compatible provider) |
569
+ | `ENV_URL` | `http://localhost:7860` | Environment server URL |
570
+
571
+ ### Command-Line Flags
572
+
573
+ | Flag | Default | Description |
574
+ |------|---------|-------------|
575
+ | `--episodes N` | 1 | Episodes per task (tasks 1–3 run in sequence) |
576
+ | `--fast-mode` | off | Use heuristic policy only (no LLM, fully reproducible) |
577
+ | `--llm-every N` | 4 | Reuse each LLM action for N steps (reduces API calls) |
578
+ | `--max-steps N` | 96 | Stop early after N steps |
579
+ | `--env-url URL` | from env | Override environment URL |
580
+ | `--output FILE` | `baseline_scores.json` | Output results file |
581
+ | `--verbose` | off | Print detailed step logs |
582
+
583
+ ### Stdout Log Format
584
+
585
+ Each episode emits structured markers for automated evaluation:
586
+
587
+ ```
588
+ [START]
589
+ [STEP1]
590
+ [STEP2]
591
+ ...
592
+ [STEP96]
593
+ [END]
594
+ ```
595
+
596
+ ---
597
+
598
+ ## ✅ OpenEnv Specification Compliance
599
+
600
+ GridMind-RL fully implements the OpenEnv specification for standardized RL environments. All components are present and tested:
601
+
602
+ | Requirement | Status | Notes |
603
+ |-------------|:------:|-------|
604
+ | Manifest (`openenv.yaml`) | ✅ | All metadata, schema definitions, and version info |
605
+ | Observation Schema | ✅ | 11-field object: temperature, storage, price, grid signal, carbon, hour, batch queue, cost, step, building_id |
606
+ | Action Schema | ✅ | 5-field object: HVAC, thermal rate, batch slot, load shed, building_id |
607
+ | HTTP Endpoints | ✅ | `/reset`, `/step`, `/state`, `/grade`, `/replay`, `/tasks`, `/health`, `/metrics` |
608
+ | Determinism | ✅ | Seeded episode generation; identical seeds produce identical trajectories |
609
+ | Typed Models | ✅ | Pydantic models (Python) mirror Go structs exactly |
610
+ | Dense Rewards | ✅ | 7-component reward breakdown every step |
611
+ | Graders | ✅ | 3 tasks with programmatic, deterministic graders (0.0–1.0 range) |
612
+ | Exploit Detection | ✅ | Built into grading pipeline to flag unrealistic scores |
613
+
614
+ ---
615
+
616
+ ## ❓ FAQ
617
+
618
+ **Q: Can I use a different model?**
619
+ A: Yes. Set `MODEL_NAME` environment variable to any OpenAI-compatible model. The default (`meta-llama/llama-3.3-70b-instruct:free`) is free on OpenRouter with no credit card.
620
+
621
+ **Q: How do I avoid rate limiting?**
622
+ A: (1) Use `--fast-mode` for local testing (no API calls), (2) Set `--llm-every 4` to reuse decisions, (3) Use a paid API tier for submission, or (4) Train & submit an offline policy.
623
+
624
+ **Q: Will my API key be exposed in submissions?**
625
+ A: No. Store your API key in `.env` (git-ignored). On HF Spaces, set secrets via the Space settings UI; keys are never committed to the repo.
626
+
627
+ **Q: What's the difference between `HF_TOKEN` and `OPENAI_API_KEY`?**
628
+ A: `HF_TOKEN` is used in HF Space deployments and external evaluations. `OPENAI_API_KEY` is a fallback for local development. The code tries `HF_TOKEN` first, then `OPENAI_API_KEY`. At least one must be set.
629
+
630
+ **Q: Can I submit an offline/trained policy?**
631
+ A: Yes. Modify `python/inference.py` to use your trained agent instead of LLM calls. Ensure you still output the required `[START]`, `[STEP]`, `[END]` format.
632
+
633
+ **Q: What if my submission times out?**
634
+ A: Each episode is 96 steps. The environment runs 3 episodes (one per task). Optimize for latency: reduce LLM calls (use `--llm-every`), use a faster model, or submit a heuristic/trained offline policy.
635
+
636
+ ---
637
+
638
+ ## 🎯 Submission Checklist
639
+
640
+ Before submitting, verify:
641
+
642
+ - [ ] Clone repo, build Docker, run `docker run -p 7860:7860 -p 7861:7861 gridmind-rl`
643
+ - [ ] Run `python inference.py --fast-mode --episodes 1` locally — should produce `baseline_scores.json`
644
+ - [ ] Check `[START]`, `[STEP]`, `[END]` markers in stdout
645
+ - [ ] Set `HF_TOKEN` or `OPENAI_API_KEY` in `.env` for LLM runs
646
+ - [ ] Test with LLM: `python inference.py --episodes 1`
647
+ - [ ] Verify Dockerfile builds without errors: `docker build -t gridmind-rl .`
648
+ - [ ] Create HF Space (Docker SDK, CPU Basic)
649
+ - [ ] Push repo to HF Space: `git push hf main`
650
+ - [ ] Set secrets in HF Space UI: `HF_TOKEN`, `API_BASE_URL` (optional), `MODEL_NAME` (optional)
651
+ - [ ] Verify Space is running: `curl https://YOUR_USERNAME-gridmind-rl.hf.space/health`
652
+ - [ ] Submit Space URL to hackathon organizers
653
+
654
+ ---
655
+
656
+ ## 📚 Additional Resources
657
+
658
+ - **OpenEnv Spec:** https://github.com/meta-pytorch/OpenEnv
659
+ - **OpenRouter Free Models:** https://openrouter.ai/keys
660
+ - **HF Spaces Docs:** https://huggingface.co/docs/hub/spaces
661
+ - **GridMind Repository:** https://github.com/LO-Kyu/gridmind-rl
662
+
663
+ ---
664
+
665
+ ## 📄 License
666
+
667
+ See `LICENSE` in the repository.
gridmind_rl.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ gridmind_rl.egg-info/PKG-INFO
5
+ gridmind_rl.egg-info/SOURCES.txt
6
+ gridmind_rl.egg-info/dependency_links.txt
7
+ gridmind_rl.egg-info/entry_points.txt
8
+ gridmind_rl.egg-info/requires.txt
9
+ gridmind_rl.egg-info/top_level.txt
10
+ python/__init__.py
11
+ python/inference.py
12
+ python/models.py
13
+ python/validate.py
14
+ server/__init__.py
15
+ server/app.py
16
+ tests/test_graders.py
gridmind_rl.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
gridmind_rl.egg-info/entry_points.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [console_scripts]
2
+ gridmind-server = server.app:main
gridmind_rl.egg-info/requires.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai>=1.0.0
2
+ openenv-core>=0.2.0
3
+ fastapi>=0.100.0
4
+ uvicorn>=0.23.0
5
+ pydantic>=2.0.0
6
+ requests>=2.31.0
7
+ httpx>=0.24.0
8
+ pytest>=7.0.0
9
+ python-dotenv>=1.0.0
10
+
11
+ [dev]
12
+ pytest>=7.0.0
13
+ pytest-cov>=4.0.0
14
+ black>=23.0.0
15
+ ruff>=0.1.0
gridmind_rl.egg-info/top_level.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ python
2
+ server
pyproject.toml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "gridmind-rl"
7
+ version = "1.0.0"
8
+ description = "GridMind-RL: Industrial Load-Shaping and Demand-Response RL Environment. Control HVAC, thermal storage, and batch job scheduling under stochastic electricity prices and grid stress events."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "LOKyu Team"}
14
+ ]
15
+ keywords = ["reinforcement-learning", "openenv", "energy-management", "demand-response"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Environment :: GPU",
19
+ "Intended Audience :: Science/Research",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Natural Language :: English",
22
+ "Operating System :: OS Independent",
23
+ "Programming Language :: Python :: 3",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
29
+ ]
30
+
31
+ dependencies = [
32
+ "openai>=1.0.0",
33
+ "openenv-core>=0.2.0",
34
+ "fastapi>=0.100.0",
35
+ "uvicorn>=0.23.0",
36
+ "pydantic>=2.0.0",
37
+ "requests>=2.31.0",
38
+ "httpx>=0.24.0",
39
+ "pytest>=7.0.0",
40
+ "python-dotenv>=1.0.0",
41
+ ]
42
+
43
+ [project.optional-dependencies]
44
+ dev = [
45
+ "pytest>=7.0.0",
46
+ "pytest-cov>=4.0.0",
47
+ "black>=23.0.0",
48
+ "ruff>=0.1.0",
49
+ ]
50
+
51
+ [project.urls]
52
+ Homepage = "https://github.com/meta-pytorch/OpenEnv"
53
+ Repository = "https://github.com/meta-pytorch/OpenEnv"
54
+ Documentation = "https://github.com/meta-pytorch/OpenEnv"
55
+
56
+ [project.scripts]
57
+ gridmind-server = "server.app:main"
58
+
59
+ [tool.setuptools]
60
+ packages = ["python", "server"]
61
+
62
+ [tool.black]
63
+ line-length = 100
64
+ target-version = ["py39", "py310", "py311"]
65
+
66
+ [tool.ruff]
67
+ line-length = 100
68
+ target-version = "py39"
python/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """
2
+ GridMind-RL Python package
3
+ """
4
+
5
+ __version__ = "1.0.0"
python/requirements.txt CHANGED
@@ -6,3 +6,4 @@ requests>=2.31.0
6
  httpx>=0.24.0
7
  pytest>=7.0.0
8
  python-dotenv>=1.0.0
 
 
6
  httpx>=0.24.0
7
  pytest>=7.0.0
8
  python-dotenv>=1.0.0
9
+ openenv-core>=0.2.0
server/__init__.py ADDED
File without changes
server/app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GridMind-RL FastAPI Server Wrapper
3
+ Proxies requests to the Go environment server (or provides fallback endpoints)
4
+ """
5
+
6
+ from fastapi import FastAPI
7
+ from fastapi.responses import JSONResponse
8
+ import os
9
+ import httpx
10
+
11
+ app = FastAPI(title="GridMind-RL", version="1.0.0")
12
+
13
+ # Go server address (set via environment or default)
14
+ GO_SERVER_URL = os.getenv("GO_SERVER_URL", "http://localhost:8000")
15
+
16
+ # Timeout for Go server calls
17
+ TIMEOUT = 30
18
+
19
+
20
+ @app.get("/health")
21
+ async def health():
22
+ """Health check endpoint."""
23
+ try:
24
+ async with httpx.AsyncClient(timeout=TIMEOUT) as client:
25
+ resp = await client.get(f"{GO_SERVER_URL}/health")
26
+ return resp.json()
27
+ except Exception:
28
+ # Fallback response if Go server unreachable
29
+ return {"status": "ok", "mode": "python"}
30
+
31
+
32
+ @app.get("/state")
33
+ async def get_state():
34
+ """Get environment state."""
35
+ try:
36
+ async with httpx.AsyncClient(timeout=TIMEOUT) as client:
37
+ resp = await client.get(f"{GO_SERVER_URL}/state")
38
+ return resp.json()
39
+ except Exception as e:
40
+ return JSONResponse({"error": str(e)}, status_code=503)
41
+
42
+
43
+ @app.post("/reset")
44
+ async def reset(request: dict):
45
+ """Reset environment."""
46
+ try:
47
+ async with httpx.AsyncClient(timeout=TIMEOUT) as client:
48
+ resp = await client.post(f"{GO_SERVER_URL}/reset", json=request)
49
+ return resp.json()
50
+ except Exception as e:
51
+ return JSONResponse({"error": str(e)}, status_code=503)
52
+
53
+
54
+ @app.post("/step")
55
+ async def step(request: dict):
56
+ """Step environment."""
57
+ try:
58
+ async with httpx.AsyncClient(timeout=TIMEOUT) as client:
59
+ resp = await client.post(f"{GO_SERVER_URL}/step", json=request)
60
+ return resp.json()
61
+ except Exception as e:
62
+ return JSONResponse({"error": str(e)}, status_code=503)
63
+
64
+
65
+ @app.get("/grade")
66
+ async def grade():
67
+ """Grade environment."""
68
+ try:
69
+ async with httpx.AsyncClient(timeout=TIMEOUT) as client:
70
+ resp = await client.get(f"{GO_SERVER_URL}/grade")
71
+ return resp.json()
72
+ except Exception as e:
73
+ return JSONResponse({"error": str(e)}, status_code=503)
74
+
75
+
76
+ def main():
77
+ """Entry point for server."""
78
+ import uvicorn
79
+ port = int(os.getenv("PORT", 7860))
80
+ uvicorn.run(app, host="0.0.0.0", port=port)
81
+
82
+
83
+ if __name__ == "__main__":
84
+ main()