{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "efa20c94265a4e8aa5c2b42b30d8a0dc": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [], "layout": "IPY_MODEL_07e8ea9a765b4bb38aa9486c08c7e876" } }, "c75a89d0eeb04c8c9563e536cf3617de": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1254edd893ea4527840a08e998a42e66", "placeholder": "​", "style": "IPY_MODEL_e0dd533ff9d14edda59583096c3ed44c", "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" } }, "823e59efcb874d0f875829bdb729f5f1": { "model_module": "@jupyter-widgets/controls", "model_name": "PasswordModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_tooltip": null, "disabled": false, "layout": "IPY_MODEL_37a76ab6f9fe46c68598ecde37caa9ff", "placeholder": "​", "style": "IPY_MODEL_62ae40b3ca12465485ca5469798b565b", "value": "" } }, "2df40cb11c5d41d6856d53cdbd57357b": { "model_module": "@jupyter-widgets/controls", "model_name": "CheckboxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "CheckboxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "CheckboxView", "description": "Add token as git credential?", "description_tooltip": null, "disabled": false, "indent": true, "layout": "IPY_MODEL_06e8ddba2c9a47e98ac550ea4e35d992", "style": "IPY_MODEL_972ce993ae0943e58e2acbd56bf0de7b", "value": true } }, "f06edce066f94a53a8b7111197c9d1ea": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ButtonView", "button_style": "", "description": "Login", "disabled": false, "icon": "", "layout": "IPY_MODEL_b4ddb223ea4d4046908f06cf8f8c72ee", "style": "IPY_MODEL_210cdf5848f143fca9b77855e24de566", "tooltip": "" } }, "4d1b5e16c7484bb2b38691acc1fd372a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9619fd586f35410a9b3a18c8c44098dd", "placeholder": "​", "style": "IPY_MODEL_e06302bb607344ba8e1e931f5c55f1a4", "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, "07e8ea9a765b4bb38aa9486c08c7e876": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "1254edd893ea4527840a08e998a42e66": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e0dd533ff9d14edda59583096c3ed44c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "37a76ab6f9fe46c68598ecde37caa9ff": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "62ae40b3ca12465485ca5469798b565b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "06e8ddba2c9a47e98ac550ea4e35d992": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "972ce993ae0943e58e2acbd56bf0de7b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b4ddb223ea4d4046908f06cf8f8c72ee": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "210cdf5848f143fca9b77855e24de566": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "button_color": null, "font_weight": "" } }, "9619fd586f35410a9b3a18c8c44098dd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e06302bb607344ba8e1e931f5c55f1a4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "694961c954f64e96979f260fc8072430": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9b5cd082ede1494e9476b29935308eb2", "placeholder": "​", "style": "IPY_MODEL_14303e52186d40c38cb936202917dbec", "value": "Connecting..." } }, "9b5cd082ede1494e9476b29935308eb2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "14303e52186d40c38cb936202917dbec": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PhHOWGEzx02F", "outputId": "218d1fde-080b-455e-dc9a-3232320b6847" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Sun Apr 26 01:11:23 2026 \n", "+-----------------------------------------------------------------------------------------+\n", "| NVIDIA-SMI 580.82.07 Driver Version: 580.82.07 CUDA Version: 13.0 |\n", "+-----------------------------------------+------------------------+----------------------+\n", "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|=========================================+========================+======================|\n", "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", "| N/A 43C P8 9W / 70W | 0MiB / 15360MiB | 0% Default |\n", "| | | N/A |\n", "+-----------------------------------------+------------------------+----------------------+\n", "\n", "+-----------------------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=========================================================================================|\n", "| No running processes found |\n", "+-----------------------------------------------------------------------------------------+\n" ] } ], "source": [ "!nvidia-smi" ] }, { "cell_type": "code", "source": [ " import torch\n", " print(f\"CUDA available: {torch.cuda.is_available()}\")\n", " if torch.cuda.is_available():\n", " print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n", " props = torch.cuda.get_device_properties(0)\n", " print(f\"VRAM: {props.total_memory / 1e9:.1f} GB\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fHa9jIEV1NhP", "outputId": "b7c9160f-f2ac-431d-f64f-f451cd1153aa" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "CUDA available: True\n", "GPU: Tesla T4\n", "VRAM: 15.6 GB\n" ] } ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "Gi8sLt91Y_PE", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "2b1c9027-ee85-4dd7-892c-79afc490e5b5" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/1.8 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m85.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "!pip install -q -U pip\n", "!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121\n", "!pip install -q bitsandbytes peft trl transformers datasets accelerate matplotlib requests huggingface_hub unsloth" ] }, { "cell_type": "code", "source": [ "from huggingface_hub import login\n", "login() # paste your HF token when prompted\n" ], "metadata": { "id": "tL1kUtkE3aB1", "colab": { "base_uri": "https://localhost:8080/", "height": 17, "referenced_widgets": [ "efa20c94265a4e8aa5c2b42b30d8a0dc", "c75a89d0eeb04c8c9563e536cf3617de", "823e59efcb874d0f875829bdb729f5f1", "2df40cb11c5d41d6856d53cdbd57357b", "f06edce066f94a53a8b7111197c9d1ea", "4d1b5e16c7484bb2b38691acc1fd372a", "07e8ea9a765b4bb38aa9486c08c7e876", "1254edd893ea4527840a08e998a42e66", "e0dd533ff9d14edda59583096c3ed44c", "37a76ab6f9fe46c68598ecde37caa9ff", "62ae40b3ca12465485ca5469798b565b", "06e8ddba2c9a47e98ac550ea4e35d992", "972ce993ae0943e58e2acbd56bf0de7b", "b4ddb223ea4d4046908f06cf8f8c72ee", "210cdf5848f143fca9b77855e24de566", "9619fd586f35410a9b3a18c8c44098dd", "e06302bb607344ba8e1e931f5c55f1a4", "694961c954f64e96979f260fc8072430", "9b5cd082ede1494e9476b29935308eb2", "14303e52186d40c38cb936202917dbec" ] }, "outputId": "f936cb56-604f-4013-8612-5d649afb97e2" }, "execution_count": 4, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "VBox(children=(HTML(value='
= 2.11.0 (found 2.10.0+cu128).\n", "/content/train.py:45: UserWarning: WARNING: Unsloth should be imported before trl, transformers, peft to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations.\n", "\n", "Please restructure your imports with 'import unsloth' at the top of your file.\n", " from unsloth import FastLanguageModel\n", "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", "🦥 Unsloth Zoo will now patch everything to make training faster!\n", "Unable to import `torchao` Tensor objects. This may affect loading checkpoints serialized with `torchao`\n", "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.\n", "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.\n", "\n", "[LOGGING] LogTriageEnv GRPO Training\n", " Model: Qwen/Qwen2.5-3B-Instruct\n", " Task: all\n", " Episodes: 30\n", " Device: cuda\n", " Env URL: https://ogrohit-logtriage-env.hf.space\n", "\n", "[OK] Connected to LogTriageEnv at https://ogrohit-logtriage-env.hf.space\n", "[MODEL] Loading model: Qwen/Qwen2.5-3B-Instruct\n", "[QLoRA] Loading model with BitsAndBytes 4-bit...\n", "tokenizer_config.json: 7.30kB [00:00, 4.55MB/s]\n", "vocab.json: 2.78MB [00:00, 104MB/s]\n", "merges.txt: 1.67MB [00:00, 111MB/s]\n", "tokenizer.json: 7.03MB [00:00, 139MB/s]\n", "[OK] 4-bit BitsAndBytesConfig applied\n", "config.json: 100% 661/661 [00:00<00:00, 5.12MB/s]\n", "model.safetensors.index.json: 35.6kB [00:00, 101MB/s]\n", "Fetching 2 files: 0% 0/2 [00:00 ./phase2_checkpoints/single_crash_ep25.json\n", " Episode 25/30 | Reward: +0.150 | Steps: 4 | Rolling avg (10): 0.145\n", " Episode 26/30 | Reward: +0.100 | Steps: 6 | Rolling avg (10): 0.160\n", " Episode 27/30 | Reward: -0.050 | Steps: 6 | Rolling avg (10): 0.090\n", " Episode 28/30 | Reward: +0.000 | Steps: 8 | Rolling avg (10): 0.055\n", " Episode 29/30 | Reward: +0.100 | Steps: 6 | Rolling avg (10): 0.070\n", " Episode 30/30 | Reward: +0.100 | Steps: 6 | Rolling avg (10): 0.065\n", "\n", "[STATS] single_crash Summary:\n", " First 10 episodes avg: 0.180\n", " Last 10 episodes avg: 0.065\n", " Improvement: -0.115\n", "\n", "============================================================\n", "[TRAIN] Training on task: cascading_failure\n", "============================================================\n", " Episode 1/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.250\n", " Episode 2/30 | Reward: -0.200 | Steps: 8 | Rolling avg (10): 0.025\n", " Episode 3/30 | Reward: +0.350 | Steps: 8 | Rolling avg (10): 0.133\n", " Episode 4/30 | Reward: -0.200 | Steps: 8 | Rolling avg (10): 0.050\n", " Episode 5/30 | Reward: +0.000 | Steps: 8 | Rolling avg (10): 0.040\n", " Episode 6/30 | Reward: +0.000 | Steps: 8 | Rolling avg (10): 0.033\n", " Episode 7/30 | Reward: +0.450 | Steps: 7 | Rolling avg (10): 0.093\n", " Episode 8/30 | Reward: +0.300 | Steps: 4 | Rolling avg (10): 0.119\n", " Episode 9/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.100\n", " Episode 10/30 | Reward: +0.000 | Steps: 8 | Rolling avg (10): 0.090\n", " Episode 11/30 | Reward: +0.500 | Steps: 6 | Rolling avg (10): 0.115\n", " Episode 12/30 | Reward: +0.350 | Steps: 3 | Rolling avg (10): 0.170\n", " Episode 13/30 | Reward: +0.300 | Steps: 4 | Rolling avg (10): 0.165\n", " Episode 14/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.180\n", " Episode 15/30 | Reward: +0.350 | Steps: 8 | Rolling avg (10): 0.215\n", " Episode 16/30 | Reward: +0.350 | Steps: 8 | Rolling avg (10): 0.250\n", " Episode 17/30 | Reward: +0.400 | Steps: 8 | Rolling avg (10): 0.245\n", " Episode 18/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.210\n", " Episode 19/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.210\n", " Episode 20/30 | Reward: +0.650 | Steps: 5 | Rolling avg (10): 0.275\n", " Episode 21/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.250\n", " Episode 22/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.210\n", " Episode 23/30 | Reward: +0.000 | Steps: 8 | Rolling avg (10): 0.180\n", " Episode 24/30 | Reward: +0.100 | Steps: 6 | Rolling avg (10): 0.195\n", " [CHECKPOINT] Saved cascading_failure ep25 -> ./phase2_checkpoints/cascading_failure_ep25.json\n", " Episode 25/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.185\n", " Episode 26/30 | Reward: +0.200 | Steps: 7 | Rolling avg (10): 0.170\n", " Episode 27/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.125\n", " Episode 28/30 | Reward: +0.300 | Steps: 4 | Rolling avg (10): 0.160\n", " Episode 29/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.160\n", " Episode 30/30 | Reward: +0.100 | Steps: 6 | Rolling avg (10): 0.105\n", "\n", "[STATS] cascading_failure Summary:\n", " First 10 episodes avg: 0.090\n", " Last 10 episodes avg: 0.105\n", " Improvement: +0.015\n", "\n", "============================================================\n", "[TRAIN] Training on task: silent_degradation\n", "============================================================\n", " Episode 1/30 | Reward: +0.300 | Steps: 4 | Rolling avg (10): 0.300\n", " Episode 2/30 | Reward: +0.100 | Steps: 6 | Rolling avg (10): 0.200\n", " Episode 3/30 | Reward: -0.200 | Steps: 8 | Rolling avg (10): 0.067\n", " Episode 4/30 | Reward: +0.050 | Steps: 7 | Rolling avg (10): 0.063\n", " Episode 5/30 | Reward: +0.650 | Steps: 5 | Rolling avg (10): 0.180\n", " Episode 6/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.142\n", " Episode 7/30 | Reward: +0.650 | Steps: 5 | Rolling avg (10): 0.214\n", " Episode 8/30 | Reward: +0.050 | Steps: 7 | Rolling avg (10): 0.194\n", " Episode 9/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.200\n", " Episode 10/30 | Reward: +0.000 | Steps: 8 | Rolling avg (10): 0.180\n", " Episode 11/30 | Reward: +0.350 | Steps: 8 | Rolling avg (10): 0.185\n", " Episode 12/30 | Reward: +0.000 | Steps: 8 | Rolling avg (10): 0.175\n", " Episode 13/30 | Reward: +0.350 | Steps: 8 | Rolling avg (10): 0.230\n", " Episode 14/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.220\n", " Episode 15/30 | Reward: +0.050 | Steps: 7 | Rolling avg (10): 0.160\n", " Episode 16/30 | Reward: +0.350 | Steps: 3 | Rolling avg (10): 0.200\n", " Episode 17/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.130\n", " Episode 18/30 | Reward: +0.350 | Steps: 6 | Rolling avg (10): 0.160\n", " Episode 19/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.160\n", " Episode 20/30 | Reward: +0.350 | Steps: 8 | Rolling avg (10): 0.195\n", " Episode 21/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.185\n", " Episode 22/30 | Reward: +0.300 | Steps: 4 | Rolling avg (10): 0.215\n", " Episode 23/30 | Reward: -0.200 | Steps: 8 | Rolling avg (10): 0.160\n", " Episode 24/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.190\n", " [CHECKPOINT] Saved silent_degradation ep25 -> ./phase2_checkpoints/silent_degradation_ep25.json\n", " Episode 25/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.210\n", " Episode 26/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.170\n", " Episode 27/30 | Reward: +0.250 | Steps: 5 | Rolling avg (10): 0.200\n", " Episode 28/30 | Reward: +0.000 | Steps: 8 | Rolling avg (10): 0.165\n", " Episode 29/30 | Reward: -0.050 | Steps: 8 | Rolling avg (10): 0.135\n", " Episode 30/30 | Reward: +0.100 | Steps: 7 | Rolling avg (10): 0.110\n", "\n", "[STATS] silent_degradation Summary:\n", " First 10 episodes avg: 0.180\n", " Last 10 episodes avg: 0.110\n", " Improvement: -0.070\n", "[PLOT] Reward curve saved -> reward_curve.png\n", "\n", "[GRPO] Collected 589 trajectory steps from rollout.\n", "[GRPO] Running GRPO fine-tuning on 589 trajectory steps...\n", "[GRPO] Precision: fp16 (bf16 unsupported on this GPU)\n", " 0% 0/10 [00:00 ./logtriage-trained\n", "\n", "[PUSH] Pushing to HuggingFace Hub: OGrohit/logtriage-sre-agent\n", "\n", "README.md: 100% 684/684 [00:00<00:00, 4.60MB/s]\n", "Processing Files (0 / 0) : | | 0.00B / 0.00B \n", "New Data Upload : | | 0.00B / 0.00B \u001b[A\n", "\n", " ...4fmqprp/model.safetensors: 1% 30.8M/2.68G [00:00 https://huggingface.co/OGrohit/logtriage-sre-agent\n", "\n", "============================================================\n", "[OK] TRAINING COMPLETE\n", "============================================================\n", " Reward curve: reward_curve.png\n", " Trained model: ./logtriage-trained\n", " HF Hub: https://huggingface.co/OGrohit/logtriage-sre-agent\n", "\n", " Use reward_curve.png in your demo slide.\n", " This image is 20% of your judging score.\n", "\n", " 0% 0/10 [06:19" ], "application/javascript": [ "\n", " async function download(id, filename, size) {\n", " if (!google.colab.kernel.accessAllowed) {\n", " return;\n", " }\n", " const div = document.createElement('div');\n", " const label = document.createElement('label');\n", " label.textContent = `Downloading \"${filename}\": `;\n", " div.appendChild(label);\n", " const progress = document.createElement('progress');\n", " progress.max = size;\n", " div.appendChild(progress);\n", " document.body.appendChild(div);\n", "\n", " const buffers = [];\n", " let downloaded = 0;\n", "\n", " const channel = await google.colab.kernel.comms.open(id);\n", " // Send a message to notify the kernel that we're ready.\n", " channel.send({})\n", "\n", " for await (const message of channel.messages) {\n", " // Send a message to notify the kernel that we're ready.\n", " channel.send({})\n", " if (message.buffers) {\n", " for (const buffer of message.buffers) {\n", " buffers.push(buffer);\n", " downloaded += buffer.byteLength;\n", " progress.value = downloaded;\n", " }\n", " }\n", " }\n", " const blob = new Blob(buffers, {type: 'application/binary'});\n", " const a = document.createElement('a');\n", " a.href = window.URL.createObjectURL(blob);\n", " a.download = filename;\n", " div.appendChild(a);\n", " a.click();\n", " div.remove();\n", " }\n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "application/javascript": [ "download(\"download_1efc715d-f3eb-4702-86d0-e541c09e6c15\", \"reward_curve.png\", 268703)" ] }, "metadata": {} } ] } ] }