Imsachin010 commited on
Commit
c783ce8
·
1 Parent(s): 876b380

Automate 7B Training using Hugging Face Space Dockerfile

Browse files
Dockerfile CHANGED
@@ -19,9 +19,13 @@ RUN pip install --no-cache-dir -r requirements.txt
19
  # Copy the salespath_env package
20
  COPY salespath_env/ ./salespath_env/
21
 
 
 
 
 
22
  # Health check
23
  HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
24
  CMD curl -f http://localhost:${PORT}/health || exit 1
25
 
26
- # Start the FastAPI server on HF Spaces port
27
- CMD ["sh", "-c", "uvicorn salespath_env.server.app:app --host 0.0.0.0 --port ${PORT}"]
 
19
  # Copy the salespath_env package
20
  COPY salespath_env/ ./salespath_env/
21
 
22
+ # Copy and set permissions for the training script
23
+ COPY run_hf_training.sh ./run_hf_training.sh
24
+ RUN chmod +x ./run_hf_training.sh
25
+
26
  # Health check
27
  HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
28
  CMD curl -f http://localhost:${PORT}/health || exit 1
29
 
30
+ # Execute the training script
31
+ CMD ["./run_hf_training.sh"]
requirements.txt CHANGED
@@ -2,3 +2,9 @@ fastapi>=0.110.0
2
  uvicorn[standard]>=0.29.0
3
  pydantic>=2.0
4
  openenv-core>=0.2.3
 
 
 
 
 
 
 
2
  uvicorn[standard]>=0.29.0
3
  pydantic>=2.0
4
  openenv-core>=0.2.3
5
+ transformers
6
+ datasets
7
+ trl
8
+ peft
9
+ httpx
10
+ matplotlib
run_hf_training.sh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Start the environment server in the background
4
+ echo "Starting SalesPath environment server..."
5
+ uvicorn salespath_env.server.app:app --host 0.0.0.0 --port 8000 &
6
+
7
+ # Give the server a few seconds to start up completely
8
+ sleep 5
9
+
10
+ # Start the GRPO Training using standard HuggingFace (PEFT)
11
+ echo "Starting 7B GRPO Training..."
12
+ PYTORCH_ALLOC_CONF=expandable_segments:True python -m training.grpo_train \
13
+ --mode grpo \
14
+ --model-name Qwen/Qwen2.5-7B-Instruct \
15
+ --grpo-steps 150 \
16
+ --grpo-dataset-size 128 \
17
+ --num-generations 4 \
18
+ --max-completion-length 256 \
19
+ --per-device-train-batch-size 4 \
20
+ --gradient-accumulation-steps 8 \
21
+ --output-dir ./salespath_out \
22
+ --logging-steps 10 \
23
+ --push-to-hub \
24
+ --hub-repo Imsachin010/salespath-qwen25-7b
25
+
26
+ echo "Training complete and pushed to hub! Keeping container alive for logs..."
27
+ tail -f /dev/null
training/colab_train.ipynb CHANGED
@@ -11,18 +11,198 @@
11
  },
12
  {
13
  "cell_type": "code",
 
14
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "source": [
16
  "!pip install -U pip\n",
17
  "!pip uninstall -y openenv || true\n",
18
  "!pip install fastapi uvicorn pydantic httpx torch transformers trl unsloth openenv-core datasets pyarrow"
19
- ],
20
- "execution_count": null,
21
- "outputs": []
22
  },
23
  {
24
  "cell_type": "code",
 
25
  "metadata": {},
 
26
  "source": [
27
  "# If the repo is not already present, clone it.\n",
28
  "# !git clone https://github.com/<your-org-or-user>/salespath_env.git\n",
@@ -31,13 +211,13 @@
31
  "%cd /content/salespath_env\n",
32
  "!pip install -e .\n",
33
  "!python -c \"import salespath_env; print('salespath_env import OK')\""
34
- ],
35
- "execution_count": null,
36
- "outputs": []
37
  },
38
  {
39
  "cell_type": "code",
 
40
  "metadata": {},
 
41
  "source": [
42
  "# Verify OpenEnv core import first.\n",
43
  "!python -c \"import openenv.core; print('openenv.core import OK')\"\n",
@@ -46,29 +226,27 @@
46
  "!nohup python -m uvicorn salespath_env.server.app:app --host 0.0.0.0 --port 8000 > /content/server.log 2>&1 &\n",
47
  "!sleep 3\n",
48
  "!python -c \"import httpx; r=httpx.get('http://127.0.0.1:8000/health', timeout=30); print(r.status_code, r.text)\""
49
- ],
50
- "execution_count": null,
51
- "outputs": []
52
  },
53
  {
54
  "cell_type": "code",
 
55
  "metadata": {},
 
56
  "source": [
57
  "# Rollout smoke test (single episode)\n",
58
  "!python -m training.test_rollout"
59
- ],
60
- "execution_count": null,
61
- "outputs": []
62
  },
63
  {
64
  "cell_type": "code",
 
65
  "metadata": {},
 
66
  "source": [
67
  "# Curriculum run (example)\n",
68
  "!python -m training.grpo_train --steps 30 --env-url http://127.0.0.1:8000 --model-name Qwen/Qwen2.5-0.5B-Instruct"
69
- ],
70
- "execution_count": null,
71
- "outputs": []
72
  },
73
  {
74
  "cell_type": "markdown",
@@ -93,14 +271,23 @@
93
  ],
94
  "metadata": {
95
  "kernelspec": {
96
- "display_name": "Python 3",
97
  "language": "python",
98
  "name": "python3"
99
  },
100
  "language_info": {
101
- "name": "python"
 
 
 
 
 
 
 
 
 
102
  }
103
  },
104
  "nbformat": 4,
105
  "nbformat_minor": 5
106
- }
 
11
  },
12
  {
13
  "cell_type": "code",
14
+ "execution_count": 1,
15
  "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "Requirement already satisfied: pip in /usr/local/lib/python3.12/dist-packages (26.0.1)\n",
22
+ "Found existing installation: openenv 0.1.13\n",
23
+ "Uninstalling openenv-0.1.13:\n",
24
+ " Successfully uninstalled openenv-0.1.13\n",
25
+ "Requirement already satisfied: fastapi in /usr/local/lib/python3.12/dist-packages (0.135.3)\n",
26
+ "Requirement already satisfied: uvicorn in /usr/local/lib/python3.12/dist-packages (0.44.0)\n",
27
+ "Requirement already satisfied: pydantic in /usr/local/lib/python3.12/dist-packages (2.12.3)\n",
28
+ "Requirement already satisfied: httpx in /usr/local/lib/python3.12/dist-packages (0.28.1)\n",
29
+ "Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (2.10.0+cu128)\n",
30
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (4.57.2)\n",
31
+ "Requirement already satisfied: trl in /usr/local/lib/python3.12/dist-packages (0.23.0)\n",
32
+ "Requirement already satisfied: unsloth in /usr/local/lib/python3.12/dist-packages (2025.11.1)\n",
33
+ "Requirement already satisfied: openenv-core in /usr/local/lib/python3.12/dist-packages (0.2.3)\n",
34
+ "Requirement already satisfied: datasets in /usr/local/lib/python3.12/dist-packages (4.3.0)\n",
35
+ "Requirement already satisfied: pyarrow in /usr/local/lib/python3.12/dist-packages (24.0.0)\n",
36
+ "Requirement already satisfied: starlette>=0.46.0 in /usr/local/lib/python3.12/dist-packages (from fastapi) (0.52.1)\n",
37
+ "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.12/dist-packages (from fastapi) (4.15.0)\n",
38
+ "Requirement already satisfied: typing-inspection>=0.4.2 in /usr/local/lib/python3.12/dist-packages (from fastapi) (0.4.2)\n",
39
+ "Requirement already satisfied: annotated-doc>=0.0.2 in /usr/local/lib/python3.12/dist-packages (from fastapi) (0.0.4)\n",
40
+ "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.12/dist-packages (from uvicorn) (8.3.2)\n",
41
+ "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.12/dist-packages (from uvicorn) (0.16.0)\n",
42
+ "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.12/dist-packages (from pydantic) (0.7.0)\n",
43
+ "Requirement already satisfied: pydantic-core==2.41.4 in /usr/local/lib/python3.12/dist-packages (from pydantic) (2.41.4)\n",
44
+ "Requirement already satisfied: anyio in /usr/local/lib/python3.12/dist-packages (from httpx) (4.13.0)\n",
45
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.12/dist-packages (from httpx) (2026.2.25)\n",
46
+ "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/dist-packages (from httpx) (1.0.9)\n",
47
+ "Requirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from httpx) (3.11)\n",
48
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from torch) (3.25.2)\n",
49
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch) (75.2.0)\n",
50
+ "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch) (1.14.0)\n",
51
+ "Requirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/dist-packages (from torch) (3.6.1)\n",
52
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch) (3.1.6)\n",
53
+ "Requirement already satisfied: fsspec>=0.8.5 in /usr/local/lib/python3.12/dist-packages (from torch) (2025.3.0)\n",
54
+ "Requirement already satisfied: cuda-bindings==12.9.4 in /usr/local/lib/python3.12/dist-packages (from torch) (12.9.4)\n",
55
+ "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.93 in /usr/local/lib/python3.12/dist-packages (from torch) (12.8.93)\n",
56
+ "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch) (12.8.90)\n",
57
+ "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch) (12.8.90)\n",
58
+ "Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch) (9.10.2.21)\n",
59
+ "Requirement already satisfied: nvidia-cublas-cu12==12.8.4.1 in /usr/local/lib/python3.12/dist-packages (from torch) (12.8.4.1)\n",
60
+ "Requirement already satisfied: nvidia-cufft-cu12==11.3.3.83 in /usr/local/lib/python3.12/dist-packages (from torch) (11.3.3.83)\n",
61
+ "Requirement already satisfied: nvidia-curand-cu12==10.3.9.90 in /usr/local/lib/python3.12/dist-packages (from torch) (10.3.9.90)\n",
62
+ "Requirement already satisfied: nvidia-cusolver-cu12==11.7.3.90 in /usr/local/lib/python3.12/dist-packages (from torch) (11.7.3.90)\n",
63
+ "Requirement already satisfied: nvidia-cusparse-cu12==12.5.8.93 in /usr/local/lib/python3.12/dist-packages (from torch) (12.5.8.93)\n",
64
+ "Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch) (0.7.1)\n",
65
+ "Requirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/dist-packages (from torch) (2.27.5)\n",
66
+ "Requirement already satisfied: nvidia-nvshmem-cu12==3.4.5 in /usr/local/lib/python3.12/dist-packages (from torch) (3.4.5)\n",
67
+ "Requirement already satisfied: nvidia-nvtx-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch) (12.8.90)\n",
68
+ "Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.93 in /usr/local/lib/python3.12/dist-packages (from torch) (12.8.93)\n",
69
+ "Requirement already satisfied: nvidia-cufile-cu12==1.13.1.3 in /usr/local/lib/python3.12/dist-packages (from torch) (1.13.1.3)\n",
70
+ "Requirement already satisfied: triton==3.6.0 in /usr/local/lib/python3.12/dist-packages (from torch) (3.6.0)\n",
71
+ "Requirement already satisfied: cuda-pathfinder~=1.1 in /usr/local/lib/python3.12/dist-packages (from cuda-bindings==12.9.4->torch) (1.5.2)\n",
72
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.36.2)\n",
73
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2.0.2)\n",
74
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (26.0)\n",
75
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.3)\n",
76
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2025.11.3)\n",
77
+ "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.4)\n",
78
+ "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.2)\n",
79
+ "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.7.0)\n",
80
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.3)\n",
81
+ "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.4.3)\n",
82
+ "Requirement already satisfied: accelerate>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from trl) (1.13.0)\n",
83
+ "Requirement already satisfied: unsloth_zoo>=2025.11.1 in /usr/local/lib/python3.12/dist-packages (from unsloth) (2026.4.9)\n",
84
+ "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.12/dist-packages (from unsloth) (0.46.3)\n",
85
+ "Requirement already satisfied: torchvision in /usr/local/lib/python3.12/dist-packages (from unsloth) (0.25.0+cu128)\n",
86
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.12/dist-packages (from unsloth) (5.9.5)\n",
87
+ "Requirement already satisfied: tyro in /usr/local/lib/python3.12/dist-packages (from unsloth) (1.0.13)\n",
88
+ "Requirement already satisfied: protobuf in /usr/local/lib/python3.12/dist-packages (from unsloth) (5.29.6)\n",
89
+ "Requirement already satisfied: xformers>=0.0.27.post2 in /usr/local/lib/python3.12/dist-packages (from unsloth) (0.0.35)\n",
90
+ "Requirement already satisfied: bitsandbytes!=0.46.0,!=0.48.0,>=0.45.5 in /usr/local/lib/python3.12/dist-packages (from unsloth) (0.49.2)\n",
91
+ "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from unsloth) (0.2.1)\n",
92
+ "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.12/dist-packages (from unsloth) (0.18.1)\n",
93
+ "Requirement already satisfied: hf_transfer in /usr/local/lib/python3.12/dist-packages (from unsloth) (0.1.9)\n",
94
+ "Requirement already satisfied: diffusers in /usr/local/lib/python3.12/dist-packages (from unsloth) (0.37.1)\n",
95
+ "Requirement already satisfied: typer>=0.9.0 in /usr/local/lib/python3.12/dist-packages (from openenv-core) (0.24.1)\n",
96
+ "Requirement already satisfied: rich>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from openenv-core) (13.9.4)\n",
97
+ "Requirement already satisfied: openai>=2.7.2 in /usr/local/lib/python3.12/dist-packages (from openenv-core) (2.31.0)\n",
98
+ "Requirement already satisfied: tomli>=2.3.0 in /usr/local/lib/python3.12/dist-packages (from openenv-core) (2.4.1)\n",
99
+ "Requirement already satisfied: tomli-w>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from openenv-core) (1.2.0)\n",
100
+ "Requirement already satisfied: websockets>=15.0.1 in /usr/local/lib/python3.12/dist-packages (from openenv-core) (15.0.1)\n",
101
+ "Requirement already satisfied: fastmcp>=3.0.0 in /usr/local/lib/python3.12/dist-packages (from openenv-core) (3.2.4)\n",
102
+ "Requirement already satisfied: gradio>=4.0.0 in /usr/local/lib/python3.12/dist-packages (from openenv-core) (5.50.0)\n",
103
+ "Requirement already satisfied: dill<0.4.1,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.3.8)\n",
104
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from datasets) (2.2.2)\n",
105
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets) (3.6.0)\n",
106
+ "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.70.16)\n",
107
+ "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (3.13.5)\n",
108
+ "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (2.6.1)\n",
109
+ "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (1.4.0)\n",
110
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (26.1.0)\n",
111
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (1.8.0)\n",
112
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (6.7.1)\n",
113
+ "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (0.4.1)\n",
114
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (1.23.0)\n",
115
+ "Requirement already satisfied: authlib>=1.6.5 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (1.6.9)\n",
116
+ "Requirement already satisfied: cyclopts>=4.0.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (4.11.0)\n",
117
+ "Requirement already satisfied: exceptiongroup>=1.2.2 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (1.3.1)\n",
118
+ "Requirement already satisfied: griffelib>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (2.0.2)\n",
119
+ "Requirement already satisfied: jsonref>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (1.1.0)\n",
120
+ "Requirement already satisfied: jsonschema-path>=0.3.4 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (0.4.5)\n",
121
+ "Requirement already satisfied: mcp<2.0,>=1.24.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (1.27.0)\n",
122
+ "Requirement already satisfied: openapi-pydantic>=0.5.1 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (0.5.1)\n",
123
+ "Requirement already satisfied: opentelemetry-api>=1.20.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (1.38.0)\n",
124
+ "Requirement already satisfied: platformdirs>=4.0.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (4.9.6)\n",
125
+ "Requirement already satisfied: py-key-value-aio<0.5.0,>=0.4.4 in /usr/local/lib/python3.12/dist-packages (from py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (0.4.4)\n",
126
+ "Requirement already satisfied: pyperclip>=1.9.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (1.11.0)\n",
127
+ "Requirement already satisfied: python-dotenv>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (1.2.2)\n",
128
+ "Requirement already satisfied: uncalled-for>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (0.3.1)\n",
129
+ "Requirement already satisfied: watchfiles>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from fastmcp>=3.0.0->openenv-core) (1.1.1)\n",
130
+ "Requirement already satisfied: httpx-sse>=0.4 in /usr/local/lib/python3.12/dist-packages (from mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (0.4.3)\n",
131
+ "Requirement already satisfied: jsonschema>=4.20.0 in /usr/local/lib/python3.12/dist-packages (from mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (4.26.0)\n",
132
+ "Requirement already satisfied: pydantic-settings>=2.5.2 in /usr/local/lib/python3.12/dist-packages (from mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (2.13.1)\n",
133
+ "Requirement already satisfied: pyjwt>=2.10.1 in /usr/local/lib/python3.12/dist-packages (from pyjwt[crypto]>=2.10.1->mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (2.12.1)\n",
134
+ "Requirement already satisfied: python-multipart>=0.0.9 in /usr/local/lib/python3.12/dist-packages (from mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (0.0.26)\n",
135
+ "Requirement already satisfied: sse-starlette>=1.6.1 in /usr/local/lib/python3.12/dist-packages (from mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (3.3.4)\n",
136
+ "Requirement already satisfied: beartype>=0.20.0 in /usr/local/lib/python3.12/dist-packages (from py-key-value-aio<0.5.0,>=0.4.4->py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (0.22.9)\n",
137
+ "Requirement already satisfied: aiofile>=3.5.0 in /usr/local/lib/python3.12/dist-packages (from py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (3.9.0)\n",
138
+ "Requirement already satisfied: keyring>=25.6.0 in /usr/local/lib/python3.12/dist-packages (from py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (25.7.0)\n",
139
+ "Requirement already satisfied: cachetools>=5.0.0 in /usr/local/lib/python3.12/dist-packages (from py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (6.2.6)\n",
140
+ "Requirement already satisfied: caio<0.10.0,>=0.9.0 in /usr/local/lib/python3.12/dist-packages (from aiofile>=3.5.0->py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (0.9.25)\n",
141
+ "Requirement already satisfied: cryptography in /usr/local/lib/python3.12/dist-packages (from authlib>=1.6.5->fastmcp>=3.0.0->openenv-core) (43.0.3)\n",
142
+ "Requirement already satisfied: docstring-parser<4.0,>=0.15 in /usr/local/lib/python3.12/dist-packages (from cyclopts>=4.0.0->fastmcp>=3.0.0->openenv-core) (0.17.0)\n",
143
+ "Requirement already satisfied: rich-rst<2.0.0,>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from cyclopts>=4.0.0->fastmcp>=3.0.0->openenv-core) (1.3.2)\n",
144
+ "Requirement already satisfied: docutils in /usr/local/lib/python3.12/dist-packages (from rich-rst<2.0.0,>=1.3.1->cyclopts>=4.0.0->fastmcp>=3.0.0->openenv-core) (0.21.2)\n",
145
+ "Requirement already satisfied: aiofiles<25.0,>=22.0 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (24.1.0)\n",
146
+ "Requirement already satisfied: brotli>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (1.2.0)\n",
147
+ "Requirement already satisfied: ffmpy in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (1.0.0)\n",
148
+ "Requirement already satisfied: gradio-client==1.14.0 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (1.14.0)\n",
149
+ "Requirement already satisfied: groovy~=0.1 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (0.1.2)\n",
150
+ "Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (3.0.3)\n",
151
+ "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (3.11.8)\n",
152
+ "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (11.3.0)\n",
153
+ "Requirement already satisfied: pydub in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (0.25.1)\n",
154
+ "Requirement already satisfied: ruff>=0.9.3 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (0.15.10)\n",
155
+ "Requirement already satisfied: safehttpx<0.2.0,>=0.1.6 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (0.1.7)\n",
156
+ "Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (2.10.0)\n",
157
+ "Requirement already satisfied: tomlkit<0.14.0,>=0.12.0 in /usr/local/lib/python3.12/dist-packages (from gradio>=4.0.0->openenv-core) (0.13.3)\n",
158
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2.9.0.post0)\n",
159
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n",
160
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2026.1)\n",
161
+ "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.12/dist-packages (from typer>=0.9.0->openenv-core) (1.5.4)\n",
162
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.12/dist-packages (from jsonschema>=4.20.0->mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (2025.9.1)\n",
163
+ "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.12/dist-packages (from jsonschema>=4.20.0->mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (0.37.0)\n",
164
+ "Requirement already satisfied: rpds-py>=0.25.0 in /usr/local/lib/python3.12/dist-packages (from jsonschema>=4.20.0->mcp<2.0,>=1.24.0->fastmcp>=3.0.0->openenv-core) (0.30.0)\n",
165
+ "Requirement already satisfied: pathable<0.6.0,>=0.5.0 in /usr/local/lib/python3.12/dist-packages (from jsonschema-path>=0.3.4->fastmcp>=3.0.0->openenv-core) (0.5.0)\n",
166
+ "Requirement already satisfied: SecretStorage>=3.2 in /usr/local/lib/python3.12/dist-packages (from keyring>=25.6.0->py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (3.5.0)\n",
167
+ "Requirement already satisfied: jeepney>=0.4.2 in /usr/local/lib/python3.12/dist-packages (from keyring>=25.6.0->py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (0.9.0)\n",
168
+ "Requirement already satisfied: jaraco.classes in /usr/local/lib/python3.12/dist-packages (from keyring>=25.6.0->py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (3.4.0)\n",
169
+ "Requirement already satisfied: jaraco.functools in /usr/local/lib/python3.12/dist-packages (from keyring>=25.6.0->py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (4.4.0)\n",
170
+ "Requirement already satisfied: jaraco.context in /usr/local/lib/python3.12/dist-packages (from keyring>=25.6.0->py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (6.1.2)\n",
171
+ "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.12/dist-packages (from openai>=2.7.2->openenv-core) (1.9.0)\n",
172
+ "Requirement already satisfied: jiter<1,>=0.10.0 in /usr/local/lib/python3.12/dist-packages (from openai>=2.7.2->openenv-core) (0.14.0)\n",
173
+ "Requirement already satisfied: sniffio in /usr/local/lib/python3.12/dist-packages (from openai>=2.7.2->openenv-core) (1.3.1)\n",
174
+ "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /usr/local/lib/python3.12/dist-packages (from opentelemetry-api>=1.20.0->fastmcp>=3.0.0->openenv-core) (8.7.1)\n",
175
+ "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.12/dist-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api>=1.20.0->fastmcp>=3.0.0->openenv-core) (3.23.0)\n",
176
+ "Requirement already satisfied: email-validator>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from pydantic[email]>=2.11.7->fastmcp>=3.0.0->openenv-core) (2.3.0)\n",
177
+ "Requirement already satisfied: dnspython>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from email-validator>=2.0.0->pydantic[email]>=2.11.7->fastmcp>=3.0.0->openenv-core) (2.8.0)\n",
178
+ "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.12/dist-packages (from cryptography->authlib>=1.6.5->fastmcp>=3.0.0->openenv-core) (2.0.0)\n",
179
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.12/dist-packages (from cffi>=1.12->cryptography->authlib>=1.6.5->fastmcp>=3.0.0->openenv-core) (3.0)\n",
180
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
181
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.7)\n",
182
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.5.0)\n",
183
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich>=13.0.0->openenv-core) (4.0.0)\n",
184
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich>=13.0.0->openenv-core) (2.20.0)\n",
185
+ "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich>=13.0.0->openenv-core) (0.1.2)\n",
186
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch) (1.3.0)\n",
187
+ "Requirement already satisfied: torchao>=0.13.0 in /usr/local/lib/python3.12/dist-packages (from unsloth_zoo>=2025.11.1->unsloth) (0.17.0)\n",
188
+ "Requirement already satisfied: cut_cross_entropy in /usr/local/lib/python3.12/dist-packages (from unsloth_zoo>=2025.11.1->unsloth) (25.1.1)\n",
189
+ "Requirement already satisfied: msgspec in /usr/local/lib/python3.12/dist-packages (from unsloth_zoo>=2025.11.1->unsloth) (0.21.1)\n",
190
+ "Requirement already satisfied: more-itertools in /usr/local/lib/python3.12/dist-packages (from jaraco.classes->keyring>=25.6.0->py-key-value-aio[filetree,keyring,memory]<0.5.0,>=0.4.4->fastmcp>=3.0.0->openenv-core) (10.8.0)\n",
191
+ "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.12/dist-packages (from tyro->unsloth) (4.5.1)\n"
192
+ ]
193
+ }
194
+ ],
195
  "source": [
196
  "!pip install -U pip\n",
197
  "!pip uninstall -y openenv || true\n",
198
  "!pip install fastapi uvicorn pydantic httpx torch transformers trl unsloth openenv-core datasets pyarrow"
199
+ ]
 
 
200
  },
201
  {
202
  "cell_type": "code",
203
+ "execution_count": null,
204
  "metadata": {},
205
+ "outputs": [],
206
  "source": [
207
  "# If the repo is not already present, clone it.\n",
208
  "# !git clone https://github.com/<your-org-or-user>/salespath_env.git\n",
 
211
  "%cd /content/salespath_env\n",
212
  "!pip install -e .\n",
213
  "!python -c \"import salespath_env; print('salespath_env import OK')\""
214
+ ]
 
 
215
  },
216
  {
217
  "cell_type": "code",
218
+ "execution_count": null,
219
  "metadata": {},
220
+ "outputs": [],
221
  "source": [
222
  "# Verify OpenEnv core import first.\n",
223
  "!python -c \"import openenv.core; print('openenv.core import OK')\"\n",
 
226
  "!nohup python -m uvicorn salespath_env.server.app:app --host 0.0.0.0 --port 8000 > /content/server.log 2>&1 &\n",
227
  "!sleep 3\n",
228
  "!python -c \"import httpx; r=httpx.get('http://127.0.0.1:8000/health', timeout=30); print(r.status_code, r.text)\""
229
+ ]
 
 
230
  },
231
  {
232
  "cell_type": "code",
233
+ "execution_count": null,
234
  "metadata": {},
235
+ "outputs": [],
236
  "source": [
237
  "# Rollout smoke test (single episode)\n",
238
  "!python -m training.test_rollout"
239
+ ]
 
 
240
  },
241
  {
242
  "cell_type": "code",
243
+ "execution_count": null,
244
  "metadata": {},
245
+ "outputs": [],
246
  "source": [
247
  "# Curriculum run (example)\n",
248
  "!python -m training.grpo_train --steps 30 --env-url http://127.0.0.1:8000 --model-name Qwen/Qwen2.5-0.5B-Instruct"
249
+ ]
 
 
250
  },
251
  {
252
  "cell_type": "markdown",
 
271
  ],
272
  "metadata": {
273
  "kernelspec": {
274
+ "display_name": "Python 3 (ipykernel)",
275
  "language": "python",
276
  "name": "python3"
277
  },
278
  "language_info": {
279
+ "codemirror_mode": {
280
+ "name": "ipython",
281
+ "version": 3
282
+ },
283
+ "file_extension": ".py",
284
+ "mimetype": "text/x-python",
285
+ "name": "python",
286
+ "nbconvert_exporter": "python",
287
+ "pygments_lexer": "ipython3",
288
+ "version": "3.12.13"
289
  }
290
  },
291
  "nbformat": 4,
292
  "nbformat_minor": 5
293
+ }
training/grpo_train.py CHANGED
@@ -77,6 +77,21 @@ def _load_model_and_tokenizer(model_name: str, use_unsloth: bool = False):
77
  torch_dtype=torch.bfloat16 if bf16_supported else torch.float32,
78
  device_map="auto",
79
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  return model, tokenizer
81
 
82
 
 
77
  torch_dtype=torch.bfloat16 if bf16_supported else torch.float32,
78
  device_map="auto",
79
  )
80
+
81
+ try:
82
+ from peft import LoraConfig, get_peft_model
83
+ print("Applying standard PEFT (LoRA) adapters...")
84
+ peft_config = LoraConfig(
85
+ r=16,
86
+ lora_alpha=16,
87
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
88
+ bias="none",
89
+ task_type="CAUSAL_LM",
90
+ )
91
+ model = get_peft_model(model, peft_config)
92
+ except ImportError:
93
+ print("Warning: PEFT not found. Proceeding with full parameter tuning (May OOM).")
94
+
95
  return model, tokenizer
96
 
97
 
training/train7b.ipynb ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "8a5758a2",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# ============================================================\n",
11
+ "# CELL 1 — Install + Clone\n",
12
+ "# ============================================================\n",
13
+ "import os, sys, subprocess, time\n",
14
+ "from pathlib import Path\n",
15
+ "\n",
16
+ "# ---------- CONFIG ----------\n",
17
+ "REPO_URL = \"https://github.com/Imsachin010/salespath_env.git\"\n",
18
+ "MODEL_NAME = \"unsloth/Qwen2.5-7B-Instruct\" # 7B Model for final submission\n",
19
+ "ENV_URL = \"http://127.0.0.1:8000\"\n",
20
+ "OUTPUT_DIR = \"/content/salespath_out\"\n",
21
+ "# -----------------------------------------\n",
22
+ "\n",
23
+ "def run(cmd, check=True, cwd=None):\n",
24
+ " print(f\"\\n$ {cmd}\")\n",
25
+ " r = subprocess.run(cmd, shell=True, text=True, capture_output=True, cwd=cwd)\n",
26
+ " if r.stdout: print(r.stdout.strip())\n",
27
+ " if r.stderr: print(r.stderr.strip())\n",
28
+ " if check and r.returncode != 0:\n",
29
+ " raise RuntimeError(f\"Command failed ({r.returncode}): {cmd}\")\n",
30
+ " return r\n",
31
+ "\n",
32
+ "!nvidia-smi\n",
33
+ "print(\"Python:\", sys.version)\n",
34
+ "\n",
35
+ "# Install dependencies\n",
36
+ "!pip install -q -U pip\n",
37
+ "!pip uninstall -y openenv 2>/dev/null || true\n",
38
+ "!pip install -q fastapi uvicorn pydantic httpx openenv-core torch transformers trl unsloth datasets pyarrow huggingface_hub matplotlib\n",
39
+ "\n",
40
+ "# Clone repo\n",
41
+ "if not Path(\"/content/salespath_env\").exists():\n",
42
+ " run(f\"git clone {REPO_URL} /content/salespath_env\")\n",
43
+ "else:\n",
44
+ " print(\"Repo already cloned.\")\n",
45
+ "\n",
46
+ "REPO_ROOT = \"/content/salespath_env\"\n",
47
+ "os.chdir(REPO_ROOT)\n",
48
+ "print(\"Working dir:\", os.getcwd())\n",
49
+ "\n",
50
+ "# Install package in editable mode\n",
51
+ "run(\"pip install -q -e .\")\n",
52
+ "run(\"python -c \\\"import salespath_env; print('salespath_env import OK')\\\"\")\n",
53
+ "run(\"python -c \\\"import openenv.core; print('openenv.core import OK')\\\"\")\n",
54
+ "\n",
55
+ "# HF Login\n",
56
+ "hf_token = os.environ.get(\"HF_TOKEN\")\n",
57
+ "if hf_token:\n",
58
+ " from huggingface_hub import login\n",
59
+ " login(token=hf_token)\n",
60
+ " print(\"HF login OK\")\n",
61
+ "else:\n",
62
+ " print(\"HF_TOKEN not set.\")\n",
63
+ "\n",
64
+ "print(\"\\n✅ Setup complete.\")"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": null,
70
+ "id": "48a62b2d",
71
+ "metadata": {},
72
+ "outputs": [],
73
+ "source": [
74
+ "# Pull the fix we just pushed\n",
75
+ "!git pull origin main"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": null,
81
+ "id": "82e04114",
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": []
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": null,
89
+ "id": "97e47c56",
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "\n",
94
+ "!PYTORCH_ALLOC_CONF=expandable_segments:True \\\n",
95
+ "python -m training.grpo_train \\\n",
96
+ " --mode grpo \\\n",
97
+ " --model-name unsloth/Qwen2.5-7B-Instruct \\\n",
98
+ " --grpo-steps 150 \\\n",
99
+ " --grpo-dataset-size 128 \\\n",
100
+ " --num-generations 2 \\\n",
101
+ " --max-completion-length 128 \\\n",
102
+ " --per-device-train-batch-size 2 \\\n",
103
+ " --gradient-accumulation-steps 8 \\\n",
104
+ " --output-dir /content/salespath_out \\\n",
105
+ " --logging-steps 10\n"
106
+ ]
107
+ }
108
+ ],
109
+ "metadata": {
110
+ "language_info": {
111
+ "name": "python"
112
+ }
113
+ },
114
+ "nbformat": 4,
115
+ "nbformat_minor": 5
116
+ }
training/traingrpo.ipynb CHANGED
The diff for this file is too large to render. See raw diff