Spaces:

K446
/

Opengrid

Running

App Files Files Community

K446 commited on 12 days ago

Commit

bcce6af

1 Parent(s): 69bab30

GRPO training with CUDA + results in UI

Browse files

Files changed (5) hide show

Dockerfile +13 -3
_fix_notebook.py +75 -0
app.py +39 -1
requirements-training.txt +1 -2
run_training.py +7 -41

Dockerfile CHANGED Viewed

@@ -2,12 +2,19 @@
 # Serves both the UI dashboard AND GRPO training.
 # Set env OPENGRID_MODE=training for training mode.
-FROM python:3.10-slim
 LABEL org.opencontainers.image.title="OpenGrid"
 LABEL org.opencontainers.image.description="Renewable energy grid load-balancing environment"
 LABEL openenv="true"
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
@@ -19,9 +26,12 @@ WORKDIR /app
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
 # Install training deps (only re-runs if training reqs change)
 COPY --chown=user requirements-training.txt .
-RUN pip install --no-cache-dir --upgrade -r requirements-training.txt
 # --- Application code (selective COPY for lean images) ---
 # Core Python modules
@@ -47,7 +57,7 @@ RUN chmod +x entrypoint.sh
 # server = FastAPI UI, training = GRPO pipeline
 EXPOSE 7860
-HEALTHCHECK --interval=30s --timeout=5s --start-period=15s \
     CMD python -c "import httpx; httpx.get('http://localhost:7860/health').raise_for_status()" || exit 1
 CMD ["./entrypoint.sh"]

 # Serves both the UI dashboard AND GRPO training.
 # Set env OPENGRID_MODE=training for training mode.
+FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
 LABEL org.opencontainers.image.title="OpenGrid"
 LABEL org.opencontainers.image.description="Renewable energy grid load-balancing environment"
 LABEL openenv="true"
+# Install Python 3.10
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3.10 python3-pip python3.10-venv && \
+    ln -sf /usr/bin/python3.10 /usr/bin/python && \
+    ln -sf /usr/bin/pip3 /usr/bin/pip && \
+    rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Install PyTorch with CUDA support (must come before training deps)
+RUN pip install --no-cache-dir torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu121
 # Install training deps (only re-runs if training reqs change)
 COPY --chown=user requirements-training.txt .
+RUN pip install --no-cache-dir --upgrade --no-deps -r requirements-training.txt
 # --- Application code (selective COPY for lean images) ---
 # Core Python modules
 # server = FastAPI UI, training = GRPO pipeline
 EXPOSE 7860
+HEALTHCHECK --interval=60s --timeout=10s --start-period=600s \
     CMD python -c "import httpx; httpx.get('http://localhost:7860/health').raise_for_status()" || exit 1
 CMD ["./entrypoint.sh"]

_fix_notebook.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""Update the notebook: fix rewards, hyperparams, remove emojis, show plots inline."""
+import json
+nb = json.load(open('training/opengrid_grpo_colab.ipynb', encoding='utf-8'))
+# Remove emojis from all cells
+for cell in nb['cells']:
+    for i, line in enumerate(cell.get('source', [])):
+        for emoji in ['🔋','⚡','🚀','📊','✅','⚠️']:
+            line = line.replace(emoji, '')
+        cell['source'][i] = line
+# Fix Cell 8: use compute_grpo_reward_env
+for cell in nb['cells']:
+    src = ''.join(cell.get('source', []))
+    if 'compute_grpo_reward,' in src and 'def reward_fn' in src:
+        cell['source'] = [
+            'import json as _json\n',
+            'from training.train_grpo import compute_grpo_reward_env, extract_action\n',
+            '\n',
+            'def reward_fn(completions, obs_context=None, **kwargs):\n',
+            '    """GRPO reward function with env-grounded physics rewards."""\n',
+            '    texts = []\n',
+            '    for c in completions:\n',
+            '        if isinstance(c, list):\n',
+            '            text = c[-1]["content"] if c else ""\n',
+            '        else:\n',
+            '            text = str(c)\n',
+            '        texts.append(text)\n',
+            '\n',
+            '    if obs_context is None:\n',
+            '        batch_obs = [None] * len(texts)\n',
+            '    else:\n',
+            '        batch_obs = [\n',
+            '            _json.loads(ctx) if isinstance(ctx, str) else ctx\n',
+            '            for ctx in obs_context\n',
+            '        ]\n',
+            '    return compute_grpo_reward_env(texts, batch_obs, task_config, horizon=3)\n',
+            '\n',
+            '# Sanity test\n',
+            'test_rewards = reward_fn([\n',
+            '    \'{"bus_adjustments": [{"bus_id": 1, "delta": 5.0}], "topology_actions": []}\',\n',
+            '    "invalid json here",\n',
+            '])\n',
+            'print(f"Test rewards: {test_rewards}")\n',
+            'assert len(test_rewards) == 2\n',
+            'print("[OK] reward_fn works")\n',
+        ]
+        break
+# Fix Cell 9: update hyperparameters
+for cell in nb['cells']:
+    src = ''.join(cell.get('source', []))
+    if 'GRPOConfig(' in src and 'num_generations' in src:
+        new_src = src.replace('num_train_epochs=1', 'num_train_epochs=3')
+        new_src = new_src.replace('gradient_accumulation_steps=4', 'gradient_accumulation_steps=8')
+        new_src = new_src.replace('learning_rate=5e-6', 'learning_rate=1e-5')
+        new_src = new_src.replace('num_generations=4', 'num_generations=8')
+        cell['source'] = new_src.splitlines(True)
+        break
+# Fix download cell: replace google.colab with inline display
+for cell in nb['cells']:
+    src = ''.join(cell.get('source', []))
+    if 'google.colab' in src:
+        cell['source'] = [
+            '# Display plots inline\n',
+            'from IPython.display import Image, display\n',
+            'display(Image("training/outputs/before_after.png"))\n',
+            'display(Image("training/outputs/training_loss.png"))\n',
+        ]
+        break
+json.dump(nb, open('training/opengrid_grpo_colab.ipynb', 'w', encoding='utf-8'), indent=1)
+print("Notebook updated successfully")

app.py CHANGED Viewed

@@ -413,4 +413,42 @@ def visualize(session_id: str):
             hist = list(history.get(session_id, []))
     img_str = generate_dashboard(hist, obs)
-    return {"image_base64": img_str}

             hist = list(history.get(session_id, []))
     img_str = generate_dashboard(hist, obs)
+    return {"image_base64": img_str}
+# ===========================================================================
+# Training Results
+# ===========================================================================
+@app.get("/training-results")
+def training_results():
+    """Return GRPO training results if available."""
+    summary_path = pathlib.Path("training/outputs/summary.json")
+    if not summary_path.exists():
+        return {"available": False}
+    with open(summary_path) as f:
+        data = json.load(f)
+    # Check if it was an error
+    if "error" in data:
+        return {"available": True, "error": data["error"]}
+    # Add plot URLs
+    data["available"] = True
+    data["plots"] = {}
+    for name in ["before_after", "training_loss"]:
+        p = pathlib.Path(f"training/outputs/{name}.png")
+        if p.exists():
+            data["plots"][name] = f"/training-plots/{name}"
+    return data
+@app.get("/training-plots/{name}")
+def training_plot(name: str):
+    """Serve a training plot image."""
+    from fastapi.responses import FileResponse
+    allowed = {"before_after", "training_loss"}
+    if name not in allowed:
+        raise HTTPException(404, "Plot not found")
+    p = pathlib.Path(f"training/outputs/{name}.png")
+    if not p.exists():
+        raise HTTPException(404, "Plot not generated yet")
+    return FileResponse(str(p), media_type="image/png")

requirements-training.txt CHANGED Viewed

@@ -1,5 +1,4 @@
-# Training dependencies
-torch
 transformers>=4.51.3
 trl>=0.12.0,<1.0
 peft>=0.13.0

+# Training dependencies (torch installed separately in Dockerfile with CUDA)
 transformers>=4.51.3
 trl>=0.12.0,<1.0
 peft>=0.13.0

run_training.py CHANGED Viewed

@@ -330,44 +330,6 @@ def run_grpo_training():
     return summary
-# ── Results Server ────────────────────────────────────────────────
-def serve_results():
-    """Serve training results on port 7860."""
-    from fastapi import FastAPI
-    from fastapi.responses import FileResponse, JSONResponse
-    import uvicorn
-    app = FastAPI(title="OpenGrid Training Results")
-    @app.get("/")
-    def root():
-        summary_path = Path("training/outputs/summary.json")
-        if summary_path.exists():
-            with open(summary_path) as f:
-                return json.load(f)
-        return {"status": "Training in progress or no results yet"}
-    @app.get("/plots/before_after")
-    def before_after():
-        p = Path("training/outputs/before_after.png")
-        if p.exists():
-            return FileResponse(str(p), media_type="image/png")
-        return JSONResponse({"error": "not ready"}, status_code=404)
-    @app.get("/plots/loss")
-    def loss():
-        p = Path("training/outputs/training_loss.png")
-        if p.exists():
-            return FileResponse(str(p), media_type="image/png")
-        return JSONResponse({"error": "not ready"}, status_code=404)
-    @app.get("/health")
-    def health():
-        return {"status": "ok"}
-    uvicorn.run(app, host="0.0.0.0", port=7860)
 # ── Main ──────────────────────────────────────────────────────────
 if __name__ == "__main__":
     try:
@@ -375,10 +337,14 @@ if __name__ == "__main__":
     except Exception as e:
         print(f"\nERROR during training: {e}")
         traceback.print_exc()
-        # Save error so the results server can report it
         os.makedirs("training/outputs", exist_ok=True)
         with open("training/outputs/summary.json", "w") as f:
             json.dump({"error": str(e)}, f)
-    print("\nStarting results server on port 7860...")
-    serve_results()

     return summary
 # ── Main ──────────────────────────────────────────────────────────
 if __name__ == "__main__":
     try:
     except Exception as e:
         print(f"\nERROR during training: {e}")
         traceback.print_exc()
+        # Save error so the UI can report it
         os.makedirs("training/outputs", exist_ok=True)
         with open("training/outputs/summary.json", "w") as f:
             json.dump({"error": str(e)}, f)
+    # Start the full UI server (not a mini results server)
+    # This serves the control room + training results on port 7860
+    print("\nTraining done. Starting full UI server on port 7860...")
+    import uvicorn
+    from app import app
+    uvicorn.run(app, host="0.0.0.0", port=7860)