vaibhav12332112312 commited on
Commit
3e5148a
·
1 Parent(s): 383294c

Track PNGs with LFS

Browse files
Files changed (1) hide show
  1. training/train_grpo.ipynb +17 -9
training/train_grpo.ipynb CHANGED
@@ -54,7 +54,8 @@
54
  "\n",
55
  "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.0.1\u001b[0m\n",
56
  "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
57
- ]
 
58
  }
59
  ]
60
  },
@@ -148,7 +149,8 @@
148
  "Branch: hack1\n",
149
  "Commit: aedc9c7\n",
150
  "Plots dir: /Users/anurag.c/viral-posts-env/plots\n"
151
- ]
 
152
  }
153
  ]
154
  },
@@ -216,14 +218,16 @@
216
  "text": [
217
  "/Users/anurag.c/viral-posts-env/.venv/lib/python3.14/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
218
  " from .autonotebook import tqdm as notebook_tqdm\n"
219
- ]
 
220
  },
221
  {
222
  "output_type": "stream",
223
  "text": [
224
  "GPU: CPU\n",
225
  "Tags: 114, Topics: 100, Horizon: 30 days\n"
226
- ]
 
227
  }
228
  ]
229
  },
@@ -322,7 +326,8 @@
322
  "output_type": "stream",
323
  "text": [
324
  "Agents and episode runner defined.\n"
325
- ]
 
326
  }
327
  ]
328
  },
@@ -400,7 +405,8 @@
400
  "random 0.5389 0.6403 0.6678 0.6157\n",
401
  "minimal 0.4145 0.7220 0.3850 0.5072\n",
402
  "smart 0.7883 0.8932 0.8986 0.8600\n"
403
- ]
 
404
  }
405
  ]
406
  },
@@ -434,7 +440,8 @@
434
  "text/plain": [
435
  "<Figure size 1600x500 with 3 Axes>"
436
  ]
437
- }
 
438
  }
439
  ]
440
  },
@@ -523,7 +530,8 @@
523
  "Loading Qwen/Qwen2.5-1.5B-Instruct without 4-bit (bitsandbytes/CUDA unavailable).\n",
524
  " On Colab: run `pip install -U bitsandbytes>=0.46.1` and use a GPU runtime.\n",
525
  " On Mac: use fp16 on MPS or fp32 on CPU.\n"
526
- ]
 
527
  },
528
  {
529
  "output_type": "error",
@@ -1077,4 +1085,4 @@
1077
  },
1078
  "nbformat": 4,
1079
  "nbformat_minor": 4
1080
- }
 
54
  "\n",
55
  "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.0.1\u001b[0m\n",
56
  "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
57
+ ],
58
+ "name": "stdout"
59
  }
60
  ]
61
  },
 
149
  "Branch: hack1\n",
150
  "Commit: aedc9c7\n",
151
  "Plots dir: /Users/anurag.c/viral-posts-env/plots\n"
152
+ ],
153
+ "name": "stdout"
154
  }
155
  ]
156
  },
 
218
  "text": [
219
  "/Users/anurag.c/viral-posts-env/.venv/lib/python3.14/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
220
  " from .autonotebook import tqdm as notebook_tqdm\n"
221
+ ],
222
+ "name": "stdout"
223
  },
224
  {
225
  "output_type": "stream",
226
  "text": [
227
  "GPU: CPU\n",
228
  "Tags: 114, Topics: 100, Horizon: 30 days\n"
229
+ ],
230
+ "name": "stdout"
231
  }
232
  ]
233
  },
 
326
  "output_type": "stream",
327
  "text": [
328
  "Agents and episode runner defined.\n"
329
+ ],
330
+ "name": "stdout"
331
  }
332
  ]
333
  },
 
405
  "random 0.5389 0.6403 0.6678 0.6157\n",
406
  "minimal 0.4145 0.7220 0.3850 0.5072\n",
407
  "smart 0.7883 0.8932 0.8986 0.8600\n"
408
+ ],
409
+ "name": "stdout"
410
  }
411
  ]
412
  },
 
440
  "text/plain": [
441
  "<Figure size 1600x500 with 3 Axes>"
442
  ]
443
+ },
444
+ "metadata": {}
445
  }
446
  ]
447
  },
 
530
  "Loading Qwen/Qwen2.5-1.5B-Instruct without 4-bit (bitsandbytes/CUDA unavailable).\n",
531
  " On Colab: run `pip install -U bitsandbytes>=0.46.1` and use a GPU runtime.\n",
532
  " On Mac: use fp16 on MPS or fp32 on CPU.\n"
533
+ ],
534
+ "name": "stdout"
535
  },
536
  {
537
  "output_type": "error",
 
1085
  },
1086
  "nbformat": 4,
1087
  "nbformat_minor": 4
1088
+ }