Spaces:
Sleeping
Sleeping
Commit Β·
a4b12d5
1
Parent(s): ec4ae03
Fix Space Dockerfile and add requirements.space.txt
Browse files- Dockerfile +21 -118
- README.md +1 -1
- requirements.space.txt +3 -0
Dockerfile
CHANGED
|
@@ -1,130 +1,33 @@
|
|
| 1 |
-
# AxiomForgeAI β
|
| 2 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
-
#
|
|
|
|
| 4 |
#
|
| 5 |
-
#
|
| 6 |
-
#
|
| 7 |
-
# PyTorch : 2.5.1+cu124 (pinned in requirements.txt)
|
| 8 |
-
# Flash-Attn : 2.8.3 (pinned in requirements.txt)
|
| 9 |
-
#
|
| 10 |
-
# All Python package versions are taken exclusively from requirements.txt.
|
| 11 |
-
# No versions are hard-coded in this file.
|
| 12 |
-
#
|
| 13 |
-
# ββ Build βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
-
# docker build -t axiomforgeai-train:latest .
|
| 15 |
-
#
|
| 16 |
-
# ββ Interactive shell βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
-
# docker run --gpus all --ipc=host --ulimit memlock=-1 \
|
| 18 |
-
# -v $(pwd)/data:/workspace/data \
|
| 19 |
-
# -v $(pwd)/checkpoints:/workspace/checkpoints \
|
| 20 |
-
# -v $(pwd)/logs:/workspace/logs \
|
| 21 |
-
# -it axiomforgeai-train:latest bash
|
| 22 |
-
#
|
| 23 |
-
# ββ GRPO training (one-shot) ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
-
# docker run --gpus all --ipc=host --ulimit memlock=-1 \
|
| 25 |
-
# -v $(pwd)/data:/workspace/data \
|
| 26 |
-
# -v $(pwd)/checkpoints:/workspace/checkpoints \
|
| 27 |
-
# -v $(pwd)/logs:/workspace/logs \
|
| 28 |
-
# axiomforgeai-train:latest \
|
| 29 |
-
# python scripts/run_grpo_training.py \
|
| 30 |
-
# --base-model checkpoints/dual_task_v1 \
|
| 31 |
-
# --gsm8k-data data/sft/gsm8k_sft.jsonl \
|
| 32 |
-
# --num-iterations 30 --group-size 8 --questions-per-iter 16
|
| 33 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
|
| 35 |
-
|
| 36 |
-
# fully compatible with the A100's CUDA 13.2 driver (driver is always β₯ toolkit).
|
| 37 |
-
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
|
| 38 |
-
|
| 39 |
-
LABEL org.opencontainers.image.title="AxiomForgeAI Training" \
|
| 40 |
-
cuda.driver.minimum="13.0" \
|
| 41 |
-
cuda.toolkit="12.4.1" \
|
| 42 |
-
torch.version="2.5.1+cu124" \
|
| 43 |
-
flash_attn.version="2.8.3"
|
| 44 |
-
|
| 45 |
-
# ββ System packages ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
-
ENV DEBIAN_FRONTEND=noninteractive
|
| 47 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 48 |
-
python3.11 \
|
| 49 |
-
python3.11-dev \
|
| 50 |
-
python3-pip \
|
| 51 |
-
python3.11-venv \
|
| 52 |
-
git \
|
| 53 |
-
git-lfs \
|
| 54 |
-
curl \
|
| 55 |
-
wget \
|
| 56 |
-
build-essential \
|
| 57 |
-
ninja-build \
|
| 58 |
-
pkg-config \
|
| 59 |
-
libssl-dev \
|
| 60 |
-
libffi-dev \
|
| 61 |
-
ca-certificates \
|
| 62 |
-
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
|
| 63 |
-
&& ln -sf /usr/bin/python3 /usr/bin/python \
|
| 64 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 65 |
-
|
| 66 |
-
# ββ Upgrade pip + build tooling βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
-
RUN python -m pip install --upgrade --no-cache-dir pip setuptools wheel
|
| 68 |
|
| 69 |
-
#
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
# re-resolving to a different version when requirements.txt is processed next.
|
| 74 |
-
RUN pip install --no-cache-dir \
|
| 75 |
-
--extra-index-url https://download.pytorch.org/whl/cu124 \
|
| 76 |
-
"torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1"
|
| 77 |
|
| 78 |
-
|
| 79 |
-
# flash-attn, xformers, vllm, triton, bitsandbytes, transformers, accelerate,
|
| 80 |
-
# peft, ray, sympy, scipy, numpy, openenv-core, fastapi, uvicorn, β¦ are all
|
| 81 |
-
# installed here at the exact versions pinned in requirements.txt.
|
| 82 |
-
# The cu124 index is provided so CUDA-linked wheels resolve correctly.
|
| 83 |
-
COPY requirements.txt /tmp/requirements.txt
|
| 84 |
-
RUN pip install --no-cache-dir \
|
| 85 |
-
--extra-index-url https://download.pytorch.org/whl/cu124 \
|
| 86 |
-
-r /tmp/requirements.txt
|
| 87 |
|
| 88 |
-
#
|
| 89 |
-
|
| 90 |
-
|
| 91 |
|
| 92 |
-
#
|
| 93 |
-
|
| 94 |
-
ENV PYTHONPATH="/workspace:$PYTHONPATH"
|
| 95 |
|
| 96 |
-
|
| 97 |
-
# -v /host/hf_cache:/workspace/.hf_cache
|
| 98 |
-
ENV HF_HOME="/workspace/.hf_cache"
|
| 99 |
-
ENV TRANSFORMERS_CACHE="/workspace/.hf_cache"
|
| 100 |
|
| 101 |
-
#
|
| 102 |
-
ENV
|
| 103 |
-
ENV NCCL_P2P_DISABLE=0
|
| 104 |
-
ENV NCCL_IB_DISABLE=0
|
| 105 |
-
# Required for Flash-Attn 2 with bfloat16 on Ampere
|
| 106 |
-
ENV TORCH_CUDNN_V8_API_ENABLED=1
|
| 107 |
|
| 108 |
-
|
| 109 |
-
# nvidia-smi is injected at runtime via --gpus, so this check runs when the
|
| 110 |
-
# container starts, not at build time.
|
| 111 |
-
RUN printf '%s\n' \
|
| 112 |
-
'#!/bin/sh' \
|
| 113 |
-
'if command -v nvidia-smi >/dev/null 2>&1; then' \
|
| 114 |
-
' CUDA_VER=$(nvidia-smi 2>/dev/null | grep -oP "CUDA Version: \K[0-9.]+" || echo "0.0")' \
|
| 115 |
-
' MAJOR=$(echo "$CUDA_VER" | cut -d. -f1)' \
|
| 116 |
-
' echo "[AxiomForgeAI] CUDA driver reports toolkit: $CUDA_VER"' \
|
| 117 |
-
' if [ "${MAJOR:-0}" -lt 13 ] 2>/dev/null; then' \
|
| 118 |
-
' echo "[ERROR] CUDA driver >= 13.0 required; detected $CUDA_VER. Upgrade your NVIDIA driver."' \
|
| 119 |
-
' exit 1' \
|
| 120 |
-
' fi' \
|
| 121 |
-
' echo "[AxiomForgeAI] CUDA $CUDA_VER >= 13.0 β OK"' \
|
| 122 |
-
'else' \
|
| 123 |
-
' echo "[WARNING] nvidia-smi not found β CUDA driver version check skipped."' \
|
| 124 |
-
'fi' \
|
| 125 |
-
'exec "$@"' \
|
| 126 |
-
> /usr/local/bin/entrypoint.sh \
|
| 127 |
-
&& chmod +x /usr/local/bin/entrypoint.sh
|
| 128 |
|
| 129 |
-
|
| 130 |
-
CMD ["bash"]
|
|
|
|
| 1 |
+
# AxiomForgeAI β Hugging Face Docker Space
|
| 2 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
# CPU-only build that exposes the OpenEnv math RL environment server.
|
| 4 |
+
# Listens on port 7860 (required by Hugging Face Spaces).
|
| 5 |
#
|
| 6 |
+
# The heavy ML stack (torch, PRM, flash-attn) is NOT installed here.
|
| 7 |
+
# The environment falls back to SymPy-only scoring, which works on CPU.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
|
| 10 |
+
FROM python:3.11-slim
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# HF requires a non-root user
|
| 13 |
+
RUN useradd -m -u 1000 user
|
| 14 |
+
USER user
|
| 15 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
WORKDIR /app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# Minimal runtime dependencies only (no torch/cuda/flash-attn)
|
| 20 |
+
COPY --chown=user requirements.space.txt requirements.txt
|
| 21 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 22 |
|
| 23 |
+
# Copy project source
|
| 24 |
+
COPY --chown=user . /app
|
|
|
|
| 25 |
|
| 26 |
+
ENV PYTHONPATH="/app:$PYTHONPATH"
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
# Disable HF telemetry inside the Space
|
| 29 |
+
ENV HF_HUB_DISABLE_TELEMETRY=1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
EXPOSE 7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
README.md
CHANGED
|
@@ -5,7 +5,7 @@ colorFrom: indigo
|
|
| 5 |
colorTo: pink
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
-
app_port:
|
| 9 |
base_path: /web
|
| 10 |
tags:
|
| 11 |
- openenv
|
|
|
|
| 5 |
colorTo: pink
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 7860
|
| 9 |
base_path: /web
|
| 10 |
tags:
|
| 11 |
- openenv
|
requirements.space.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv[core]>=0.2.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|