Spaces:
Paused
Paused
Commit ·
3e5148a
1
Parent(s): 383294c
Track PNGs with LFS
Browse files- training/train_grpo.ipynb +17 -9
training/train_grpo.ipynb
CHANGED
|
@@ -54,7 +54,8 @@
|
|
| 54 |
"\n",
|
| 55 |
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.0.1\u001b[0m\n",
|
| 56 |
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
| 57 |
-
]
|
|
|
|
| 58 |
}
|
| 59 |
]
|
| 60 |
},
|
|
@@ -148,7 +149,8 @@
|
|
| 148 |
"Branch: hack1\n",
|
| 149 |
"Commit: aedc9c7\n",
|
| 150 |
"Plots dir: /Users/anurag.c/viral-posts-env/plots\n"
|
| 151 |
-
]
|
|
|
|
| 152 |
}
|
| 153 |
]
|
| 154 |
},
|
|
@@ -216,14 +218,16 @@
|
|
| 216 |
"text": [
|
| 217 |
"/Users/anurag.c/viral-posts-env/.venv/lib/python3.14/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 218 |
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 219 |
-
]
|
|
|
|
| 220 |
},
|
| 221 |
{
|
| 222 |
"output_type": "stream",
|
| 223 |
"text": [
|
| 224 |
"GPU: CPU\n",
|
| 225 |
"Tags: 114, Topics: 100, Horizon: 30 days\n"
|
| 226 |
-
]
|
|
|
|
| 227 |
}
|
| 228 |
]
|
| 229 |
},
|
|
@@ -322,7 +326,8 @@
|
|
| 322 |
"output_type": "stream",
|
| 323 |
"text": [
|
| 324 |
"Agents and episode runner defined.\n"
|
| 325 |
-
]
|
|
|
|
| 326 |
}
|
| 327 |
]
|
| 328 |
},
|
|
@@ -400,7 +405,8 @@
|
|
| 400 |
"random 0.5389 0.6403 0.6678 0.6157\n",
|
| 401 |
"minimal 0.4145 0.7220 0.3850 0.5072\n",
|
| 402 |
"smart 0.7883 0.8932 0.8986 0.8600\n"
|
| 403 |
-
]
|
|
|
|
| 404 |
}
|
| 405 |
]
|
| 406 |
},
|
|
@@ -434,7 +440,8 @@
|
|
| 434 |
"text/plain": [
|
| 435 |
"<Figure size 1600x500 with 3 Axes>"
|
| 436 |
]
|
| 437 |
-
}
|
|
|
|
| 438 |
}
|
| 439 |
]
|
| 440 |
},
|
|
@@ -523,7 +530,8 @@
|
|
| 523 |
"Loading Qwen/Qwen2.5-1.5B-Instruct without 4-bit (bitsandbytes/CUDA unavailable).\n",
|
| 524 |
" On Colab: run `pip install -U bitsandbytes>=0.46.1` and use a GPU runtime.\n",
|
| 525 |
" On Mac: use fp16 on MPS or fp32 on CPU.\n"
|
| 526 |
-
]
|
|
|
|
| 527 |
},
|
| 528 |
{
|
| 529 |
"output_type": "error",
|
|
@@ -1077,4 +1085,4 @@
|
|
| 1077 |
},
|
| 1078 |
"nbformat": 4,
|
| 1079 |
"nbformat_minor": 4
|
| 1080 |
-
}
|
|
|
|
| 54 |
"\n",
|
| 55 |
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.0.1\u001b[0m\n",
|
| 56 |
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
| 57 |
+
],
|
| 58 |
+
"name": "stdout"
|
| 59 |
}
|
| 60 |
]
|
| 61 |
},
|
|
|
|
| 149 |
"Branch: hack1\n",
|
| 150 |
"Commit: aedc9c7\n",
|
| 151 |
"Plots dir: /Users/anurag.c/viral-posts-env/plots\n"
|
| 152 |
+
],
|
| 153 |
+
"name": "stdout"
|
| 154 |
}
|
| 155 |
]
|
| 156 |
},
|
|
|
|
| 218 |
"text": [
|
| 219 |
"/Users/anurag.c/viral-posts-env/.venv/lib/python3.14/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 220 |
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 221 |
+
],
|
| 222 |
+
"name": "stdout"
|
| 223 |
},
|
| 224 |
{
|
| 225 |
"output_type": "stream",
|
| 226 |
"text": [
|
| 227 |
"GPU: CPU\n",
|
| 228 |
"Tags: 114, Topics: 100, Horizon: 30 days\n"
|
| 229 |
+
],
|
| 230 |
+
"name": "stdout"
|
| 231 |
}
|
| 232 |
]
|
| 233 |
},
|
|
|
|
| 326 |
"output_type": "stream",
|
| 327 |
"text": [
|
| 328 |
"Agents and episode runner defined.\n"
|
| 329 |
+
],
|
| 330 |
+
"name": "stdout"
|
| 331 |
}
|
| 332 |
]
|
| 333 |
},
|
|
|
|
| 405 |
"random 0.5389 0.6403 0.6678 0.6157\n",
|
| 406 |
"minimal 0.4145 0.7220 0.3850 0.5072\n",
|
| 407 |
"smart 0.7883 0.8932 0.8986 0.8600\n"
|
| 408 |
+
],
|
| 409 |
+
"name": "stdout"
|
| 410 |
}
|
| 411 |
]
|
| 412 |
},
|
|
|
|
| 440 |
"text/plain": [
|
| 441 |
"<Figure size 1600x500 with 3 Axes>"
|
| 442 |
]
|
| 443 |
+
},
|
| 444 |
+
"metadata": {}
|
| 445 |
}
|
| 446 |
]
|
| 447 |
},
|
|
|
|
| 530 |
"Loading Qwen/Qwen2.5-1.5B-Instruct without 4-bit (bitsandbytes/CUDA unavailable).\n",
|
| 531 |
" On Colab: run `pip install -U bitsandbytes>=0.46.1` and use a GPU runtime.\n",
|
| 532 |
" On Mac: use fp16 on MPS or fp32 on CPU.\n"
|
| 533 |
+
],
|
| 534 |
+
"name": "stdout"
|
| 535 |
},
|
| 536 |
{
|
| 537 |
"output_type": "error",
|
|
|
|
| 1085 |
},
|
| 1086 |
"nbformat": 4,
|
| 1087 |
"nbformat_minor": 4
|
| 1088 |
+
}
|