OGrohit commited on
Commit
e9691c1
·
verified ·
1 Parent(s): a1b4282

Upload LogTriageEnv_Training.ipynb

Browse files
Files changed (1) hide show
  1. LogTriageEnv_Training.ipynb +50 -9
LogTriageEnv_Training.ipynb CHANGED
@@ -180,12 +180,47 @@
180
  "source": [
181
  "import subprocess\n",
182
  "import os\n",
 
183
  "\n",
184
- "# Ensure train.py exists in current directory\n",
185
- "if not os.path.exists('train.py'):\n",
186
- " print(\"⚠ train.py not found in current directory\")\n",
187
- " print(\"Make sure to upload train.py to this notebook first\")\n",
188
- "else:\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  " print(\"\\n\" + \"=\"*60)\n",
190
  " print(\"[START] LogTriageEnv Training\")\n",
191
  " print(\"=\"*60)\n",
@@ -196,7 +231,7 @@
196
  " print()\n",
197
  "\n",
198
  " cmd = [\n",
199
- " \"python\", \"train.py\",\n",
200
  " \"--model\", model_id,\n",
201
  " \"--task\", \"all\",\n",
202
  " \"--episodes\", \"50\",\n",
@@ -216,7 +251,11 @@
216
  " except subprocess.TimeoutExpired:\n",
217
  " print(\"⚠ Training timed out after 30 minutes\")\n",
218
  " except Exception as e:\n",
219
- " print(f\"Error: {e}\")"
 
 
 
 
220
  ]
221
  },
222
  {
@@ -343,7 +382,8 @@
343
  " df = pd.read_csv(csv_file)\n",
344
  " print(f\"[{files[0]}]\")\n",
345
  " print(df.head(10).to_string())\n",
346
- " print(f\"\\n✓ {len(df)} episodes tracked\")\nelse:\n",
 
347
  " print(f\"⚠ Log directory not found: {csv_dir}\")\n",
348
  " print(\"CSV logs are generated during training\")"
349
  ]
@@ -384,7 +424,8 @@
384
  " !zip -r {f}.zip {f}\n",
385
  " files.download(f\"{f}.zip\")\n",
386
  " print(f\"✓ {f} ready\")\n",
387
- " \nexcept ImportError:\n",
 
388
  " print(\"[INFO] Not in Colab environment\")\n",
389
  " print(\"Files saved locally:\")\n",
390
  " !ls -lh reward_curve.png logtriage-trained/ phase2_checkpoints/ logs/ 2>/dev/null || echo \"Check current directory\""
 
180
  "source": [
181
  "import subprocess\n",
182
  "import os\n",
183
+ "import shutil\n",
184
  "\n",
185
+ "print(\"\\n\" + \"=\"*60)\n",
186
+ "print(\"[STEP 5A] Clone Repository from GitHub\")\n",
187
+ "print(\"=\"*60)\n",
188
+ "\n",
189
+ "# Clone the repository\n",
190
+ "repo_url = \"https://github.com/rohitdecodes/logtriage-env.git\"\n",
191
+ "repo_dir = \"logtriage-env\"\n",
192
+ "\n",
193
+ "# Remove existing repo if it exists\n",
194
+ "if os.path.exists(repo_dir):\n",
195
+ " print(f\"⚠ {repo_dir} already exists, removing...\")\n",
196
+ " shutil.rmtree(repo_dir)\n",
197
+ "\n",
198
+ "try:\n",
199
+ " print(f\"Cloning from {repo_url}...\")\n",
200
+ " result = subprocess.run(\n",
201
+ " [\"git\", \"clone\", repo_url, repo_dir],\n",
202
+ " capture_output=True,\n",
203
+ " text=True,\n",
204
+ " timeout=300\n",
205
+ " )\n",
206
+ "\n",
207
+ " if result.returncode == 0:\n",
208
+ " print(f\"✓ Repository cloned successfully\")\n",
209
+ " train_py_path = os.path.join(repo_dir, \"train.py\")\n",
210
+ " else:\n",
211
+ " print(f\"⚠ Clone failed: {result.stderr}\")\n",
212
+ " train_py_path = \"train.py\"\n",
213
+ "except Exception as e:\n",
214
+ " print(f\"⚠ Clone error: {e}\")\n",
215
+ " train_py_path = \"train.py\"\n",
216
+ "\n",
217
+ "print()\n",
218
+ "print(\"=\"*60)\n",
219
+ "print(\"[STEP 5B] Launch Training\")\n",
220
+ "print(\"=\"*60)\n",
221
+ "\n",
222
+ "# Check if train.py exists (either from clone or current directory)\n",
223
+ "if os.path.exists(train_py_path):\n",
224
  " print(\"\\n\" + \"=\"*60)\n",
225
  " print(\"[START] LogTriageEnv Training\")\n",
226
  " print(\"=\"*60)\n",
 
231
  " print()\n",
232
  "\n",
233
  " cmd = [\n",
234
+ " \"python\", train_py_path,\n",
235
  " \"--model\", model_id,\n",
236
  " \"--task\", \"all\",\n",
237
  " \"--episodes\", \"50\",\n",
 
251
  " except subprocess.TimeoutExpired:\n",
252
  " print(\"⚠ Training timed out after 30 minutes\")\n",
253
  " except Exception as e:\n",
254
+ " print(f\"Error: {e}\")\n",
255
+ "else:\n",
256
+ " print(f\"⚠ train.py not found at {train_py_path}\")\n",
257
+ " print(\"✗ TRAINING FAILED\")\n",
258
+ " print(\"Make sure the repository clone was successful or train.py exists in current directory\")"
259
  ]
260
  },
261
  {
 
382
  " df = pd.read_csv(csv_file)\n",
383
  " print(f\"[{files[0]}]\")\n",
384
  " print(df.head(10).to_string())\n",
385
+ " print(f\"\\n✓ {len(df)} episodes tracked\")\n",
386
+ "else:\n",
387
  " print(f\"⚠ Log directory not found: {csv_dir}\")\n",
388
  " print(\"CSV logs are generated during training\")"
389
  ]
 
424
  " !zip -r {f}.zip {f}\n",
425
  " files.download(f\"{f}.zip\")\n",
426
  " print(f\"✓ {f} ready\")\n",
427
+ " \n",
428
+ "except ImportError:\n",
429
  " print(\"[INFO] Not in Colab environment\")\n",
430
  " print(\"Files saved locally:\")\n",
431
  " !ls -lh reward_curve.png logtriage-trained/ phase2_checkpoints/ logs/ 2>/dev/null || echo \"Check current directory\""