K446 commited on
Commit
1dfed79
·
1 Parent(s): db3026a

fix: unified Dockerfile with entrypoint for server/training mode

Browse files

- Single Dockerfile handles both UI server and GRPO training
- OPENGRID_MODE=training runs run_training.py
- OPENGRID_MODE=server (default) runs uvicorn
- Training Space env var OPENGRID_MODE=training already set via HF API

Files changed (2) hide show
  1. Dockerfile +16 -5
  2. entrypoint.sh +22 -0
Dockerfile CHANGED
@@ -1,5 +1,8 @@
1
  # Hugging Face Docker Space — OpenGrid
2
  # Docs: https://huggingface.co/docs/hub/spaces-sdks-docker
 
 
 
3
 
4
  FROM python:3.10-slim
5
 
@@ -14,19 +17,27 @@ ENV PATH="/home/user/.local/bin:$PATH"
14
 
15
  WORKDIR /app
16
 
17
- # Install dependencies
18
  COPY --chown=user requirements.txt .
19
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
 
 
20
 
21
  # Copy application code
22
  COPY --chown=user . /app
23
 
 
 
 
 
 
 
24
  # Expose HF Spaces default port
25
  EXPOSE 7860
26
 
27
- # Healthcheck
28
  HEALTHCHECK --interval=30s --timeout=5s --start-period=15s \
29
  CMD python -c "import httpx; httpx.get('http://localhost:7860/health').raise_for_status()" || exit 1
30
 
31
- # Run the server
32
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
  # Hugging Face Docker Space — OpenGrid
2
  # Docs: https://huggingface.co/docs/hub/spaces-sdks-docker
3
+ #
4
+ # This Dockerfile serves both the UI Space and the Training Space.
5
+ # Set OPENGRID_MODE=training to run GRPO training instead of the server.
6
 
7
  FROM python:3.10-slim
8
 
 
17
 
18
  WORKDIR /app
19
 
20
+ # Install dependencies (both server and training)
21
  COPY --chown=user requirements.txt .
22
+ COPY --chown=user requirements-training.txt .
23
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt \
24
+ && pip install --no-cache-dir --upgrade -r requirements-training.txt
25
 
26
  # Copy application code
27
  COPY --chown=user . /app
28
 
29
+ # Make entrypoint executable
30
+ RUN chmod +x entrypoint.sh
31
+
32
+ # Default to server mode (override with OPENGRID_MODE=training)
33
+ ENV OPENGRID_MODE=server
34
+
35
  # Expose HF Spaces default port
36
  EXPOSE 7860
37
 
38
+ # Healthcheck (only applies in server mode)
39
  HEALTHCHECK --interval=30s --timeout=5s --start-period=15s \
40
  CMD python -c "import httpx; httpx.get('http://localhost:7860/health').raise_for_status()" || exit 1
41
 
42
+ # Entrypoint switches between server and training
43
+ CMD ["./entrypoint.sh"]
entrypoint.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # OpenGrid entrypoint — switches between UI server and GRPO training
3
+ # based on the OPENGRID_MODE environment variable.
4
+ #
5
+ # OPENGRID_MODE=training → runs GRPO training pipeline
6
+ # OPENGRID_MODE=server → runs the FastAPI UI server (default)
7
+
8
+ set -e
9
+
10
+ MODE="${OPENGRID_MODE:-server}"
11
+
12
+ if [ "$MODE" = "training" ]; then
13
+ echo "========================================"
14
+ echo " OpenGrid — GRPO Training Mode"
15
+ echo "========================================"
16
+ exec python run_training.py
17
+ else
18
+ echo "========================================"
19
+ echo " OpenGrid — Control Room Server"
20
+ echo "========================================"
21
+ exec uvicorn app:app --host 0.0.0.0 --port 7860
22
+ fi