Spaces:
Running
Running
Upload LogTriageEnv_Training.ipynb
Browse files- LogTriageEnv_Training.ipynb +50 -9
LogTriageEnv_Training.ipynb
CHANGED
|
@@ -180,12 +180,47 @@
|
|
| 180 |
"source": [
|
| 181 |
"import subprocess\n",
|
| 182 |
"import os\n",
|
|
|
|
| 183 |
"\n",
|
| 184 |
-
"
|
| 185 |
-
"
|
| 186 |
-
"
|
| 187 |
-
"
|
| 188 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
" print(\"\\n\" + \"=\"*60)\n",
|
| 190 |
" print(\"[START] LogTriageEnv Training\")\n",
|
| 191 |
" print(\"=\"*60)\n",
|
|
@@ -196,7 +231,7 @@
|
|
| 196 |
" print()\n",
|
| 197 |
"\n",
|
| 198 |
" cmd = [\n",
|
| 199 |
-
" \"python\",
|
| 200 |
" \"--model\", model_id,\n",
|
| 201 |
" \"--task\", \"all\",\n",
|
| 202 |
" \"--episodes\", \"50\",\n",
|
|
@@ -216,7 +251,11 @@
|
|
| 216 |
" except subprocess.TimeoutExpired:\n",
|
| 217 |
" print(\"⚠ Training timed out after 30 minutes\")\n",
|
| 218 |
" except Exception as e:\n",
|
| 219 |
-
" print(f\"Error: {e}\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
]
|
| 221 |
},
|
| 222 |
{
|
|
@@ -343,7 +382,8 @@
|
|
| 343 |
" df = pd.read_csv(csv_file)\n",
|
| 344 |
" print(f\"[{files[0]}]\")\n",
|
| 345 |
" print(df.head(10).to_string())\n",
|
| 346 |
-
" print(f\"\\n✓ {len(df)} episodes tracked\")\
|
|
|
|
| 347 |
" print(f\"⚠ Log directory not found: {csv_dir}\")\n",
|
| 348 |
" print(\"CSV logs are generated during training\")"
|
| 349 |
]
|
|
@@ -384,7 +424,8 @@
|
|
| 384 |
" !zip -r {f}.zip {f}\n",
|
| 385 |
" files.download(f\"{f}.zip\")\n",
|
| 386 |
" print(f\"✓ {f} ready\")\n",
|
| 387 |
-
" \
|
|
|
|
| 388 |
" print(\"[INFO] Not in Colab environment\")\n",
|
| 389 |
" print(\"Files saved locally:\")\n",
|
| 390 |
" !ls -lh reward_curve.png logtriage-trained/ phase2_checkpoints/ logs/ 2>/dev/null || echo \"Check current directory\""
|
|
|
|
| 180 |
"source": [
|
| 181 |
"import subprocess\n",
|
| 182 |
"import os\n",
|
| 183 |
+
"import shutil\n",
|
| 184 |
"\n",
|
| 185 |
+
"print(\"\\n\" + \"=\"*60)\n",
|
| 186 |
+
"print(\"[STEP 5A] Clone Repository from GitHub\")\n",
|
| 187 |
+
"print(\"=\"*60)\n",
|
| 188 |
+
"\n",
|
| 189 |
+
"# Clone the repository\n",
|
| 190 |
+
"repo_url = \"https://github.com/rohitdecodes/logtriage-env.git\"\n",
|
| 191 |
+
"repo_dir = \"logtriage-env\"\n",
|
| 192 |
+
"\n",
|
| 193 |
+
"# Remove existing repo if it exists\n",
|
| 194 |
+
"if os.path.exists(repo_dir):\n",
|
| 195 |
+
" print(f\"⚠ {repo_dir} already exists, removing...\")\n",
|
| 196 |
+
" shutil.rmtree(repo_dir)\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"try:\n",
|
| 199 |
+
" print(f\"Cloning from {repo_url}...\")\n",
|
| 200 |
+
" result = subprocess.run(\n",
|
| 201 |
+
" [\"git\", \"clone\", repo_url, repo_dir],\n",
|
| 202 |
+
" capture_output=True,\n",
|
| 203 |
+
" text=True,\n",
|
| 204 |
+
" timeout=300\n",
|
| 205 |
+
" )\n",
|
| 206 |
+
"\n",
|
| 207 |
+
" if result.returncode == 0:\n",
|
| 208 |
+
" print(f\"✓ Repository cloned successfully\")\n",
|
| 209 |
+
" train_py_path = os.path.join(repo_dir, \"train.py\")\n",
|
| 210 |
+
" else:\n",
|
| 211 |
+
" print(f\"⚠ Clone failed: {result.stderr}\")\n",
|
| 212 |
+
" train_py_path = \"train.py\"\n",
|
| 213 |
+
"except Exception as e:\n",
|
| 214 |
+
" print(f\"⚠ Clone error: {e}\")\n",
|
| 215 |
+
" train_py_path = \"train.py\"\n",
|
| 216 |
+
"\n",
|
| 217 |
+
"print()\n",
|
| 218 |
+
"print(\"=\"*60)\n",
|
| 219 |
+
"print(\"[STEP 5B] Launch Training\")\n",
|
| 220 |
+
"print(\"=\"*60)\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"# Check if train.py exists (either from clone or current directory)\n",
|
| 223 |
+
"if os.path.exists(train_py_path):\n",
|
| 224 |
" print(\"\\n\" + \"=\"*60)\n",
|
| 225 |
" print(\"[START] LogTriageEnv Training\")\n",
|
| 226 |
" print(\"=\"*60)\n",
|
|
|
|
| 231 |
" print()\n",
|
| 232 |
"\n",
|
| 233 |
" cmd = [\n",
|
| 234 |
+
" \"python\", train_py_path,\n",
|
| 235 |
" \"--model\", model_id,\n",
|
| 236 |
" \"--task\", \"all\",\n",
|
| 237 |
" \"--episodes\", \"50\",\n",
|
|
|
|
| 251 |
" except subprocess.TimeoutExpired:\n",
|
| 252 |
" print(\"⚠ Training timed out after 30 minutes\")\n",
|
| 253 |
" except Exception as e:\n",
|
| 254 |
+
" print(f\"Error: {e}\")\n",
|
| 255 |
+
"else:\n",
|
| 256 |
+
" print(f\"⚠ train.py not found at {train_py_path}\")\n",
|
| 257 |
+
" print(\"✗ TRAINING FAILED\")\n",
|
| 258 |
+
" print(\"Make sure the repository clone was successful or train.py exists in current directory\")"
|
| 259 |
]
|
| 260 |
},
|
| 261 |
{
|
|
|
|
| 382 |
" df = pd.read_csv(csv_file)\n",
|
| 383 |
" print(f\"[{files[0]}]\")\n",
|
| 384 |
" print(df.head(10).to_string())\n",
|
| 385 |
+
" print(f\"\\n✓ {len(df)} episodes tracked\")\n",
|
| 386 |
+
"else:\n",
|
| 387 |
" print(f\"⚠ Log directory not found: {csv_dir}\")\n",
|
| 388 |
" print(\"CSV logs are generated during training\")"
|
| 389 |
]
|
|
|
|
| 424 |
" !zip -r {f}.zip {f}\n",
|
| 425 |
" files.download(f\"{f}.zip\")\n",
|
| 426 |
" print(f\"✓ {f} ready\")\n",
|
| 427 |
+
" \n",
|
| 428 |
+
"except ImportError:\n",
|
| 429 |
" print(\"[INFO] Not in Colab environment\")\n",
|
| 430 |
" print(\"Files saved locally:\")\n",
|
| 431 |
" !ls -lh reward_curve.png logtriage-trained/ phase2_checkpoints/ logs/ 2>/dev/null || echo \"Check current directory\""
|