{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "efa20c94265a4e8aa5c2b42b30d8a0dc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "VBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "VBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "VBoxView",
            "box_style": "",
            "children": [],
            "layout": "IPY_MODEL_07e8ea9a765b4bb38aa9486c08c7e876"
          }
        },
        "c75a89d0eeb04c8c9563e536cf3617de": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1254edd893ea4527840a08e998a42e66",
            "placeholder": "​",
            "style": "IPY_MODEL_e0dd533ff9d14edda59583096c3ed44c",
            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
          }
        },
        "823e59efcb874d0f875829bdb729f5f1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "PasswordModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "PasswordModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "PasswordView",
            "continuous_update": true,
            "description": "Token:",
            "description_tooltip": null,
            "disabled": false,
            "layout": "IPY_MODEL_37a76ab6f9fe46c68598ecde37caa9ff",
            "placeholder": "​",
            "style": "IPY_MODEL_62ae40b3ca12465485ca5469798b565b",
            "value": ""
          }
        },
        "2df40cb11c5d41d6856d53cdbd57357b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "CheckboxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "CheckboxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "CheckboxView",
            "description": "Add token as git credential?",
            "description_tooltip": null,
            "disabled": false,
            "indent": true,
            "layout": "IPY_MODEL_06e8ddba2c9a47e98ac550ea4e35d992",
            "style": "IPY_MODEL_972ce993ae0943e58e2acbd56bf0de7b",
            "value": true
          }
        },
        "f06edce066f94a53a8b7111197c9d1ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ButtonModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ButtonModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ButtonView",
            "button_style": "",
            "description": "Login",
            "disabled": false,
            "icon": "",
            "layout": "IPY_MODEL_b4ddb223ea4d4046908f06cf8f8c72ee",
            "style": "IPY_MODEL_210cdf5848f143fca9b77855e24de566",
            "tooltip": ""
          }
        },
        "4d1b5e16c7484bb2b38691acc1fd372a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9619fd586f35410a9b3a18c8c44098dd",
            "placeholder": "​",
            "style": "IPY_MODEL_e06302bb607344ba8e1e931f5c55f1a4",
            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
          }
        },
        "07e8ea9a765b4bb38aa9486c08c7e876": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": "center",
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": "flex",
            "flex": null,
            "flex_flow": "column",
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": "50%"
          }
        },
        "1254edd893ea4527840a08e998a42e66": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e0dd533ff9d14edda59583096c3ed44c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "37a76ab6f9fe46c68598ecde37caa9ff": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "62ae40b3ca12465485ca5469798b565b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "06e8ddba2c9a47e98ac550ea4e35d992": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "972ce993ae0943e58e2acbd56bf0de7b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b4ddb223ea4d4046908f06cf8f8c72ee": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "210cdf5848f143fca9b77855e24de566": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ButtonStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ButtonStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "button_color": null,
            "font_weight": ""
          }
        },
        "9619fd586f35410a9b3a18c8c44098dd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e06302bb607344ba8e1e931f5c55f1a4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "694961c954f64e96979f260fc8072430": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9b5cd082ede1494e9476b29935308eb2",
            "placeholder": "​",
            "style": "IPY_MODEL_14303e52186d40c38cb936202917dbec",
            "value": "Connecting..."
          }
        },
        "9b5cd082ede1494e9476b29935308eb2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "14303e52186d40c38cb936202917dbec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "PhHOWGEzx02F",
        "outputId": "218d1fde-080b-455e-dc9a-3232320b6847"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Sun Apr 26 01:11:23 2026       \n",
            "+-----------------------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 580.82.07              Driver Version: 580.82.07      CUDA Version: 13.0     |\n",
            "+-----------------------------------------+------------------------+----------------------+\n",
            "| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |\n",
            "|                                         |                        |               MIG M. |\n",
            "|=========================================+========================+======================|\n",
            "|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |\n",
            "| N/A   43C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |\n",
            "|                                         |                        |                  N/A |\n",
            "+-----------------------------------------+------------------------+----------------------+\n",
            "\n",
            "+-----------------------------------------------------------------------------------------+\n",
            "| Processes:                                                                              |\n",
            "|  GPU   GI   CI              PID   Type   Process name                        GPU Memory |\n",
            "|        ID   ID                                                               Usage      |\n",
            "|=========================================================================================|\n",
            "|  No running processes found                                                             |\n",
            "+-----------------------------------------------------------------------------------------+\n"
          ]
        }
      ],
      "source": [
        "!nvidia-smi"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "  import torch\n",
        "  print(f\"CUDA available: {torch.cuda.is_available()}\")\n",
        "  if torch.cuda.is_available():\n",
        "      print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n",
        "      props = torch.cuda.get_device_properties(0)\n",
        "      print(f\"VRAM: {props.total_memory / 1e9:.1f} GB\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fHa9jIEV1NhP",
        "outputId": "b7c9160f-f2ac-431d-f64f-f451cd1153aa"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "CUDA available: True\n",
            "GPU: Tesla T4\n",
            "VRAM: 15.6 GB\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "id": "Gi8sLt91Y_PE",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2b1c9027-ee85-4dd7-892c-79afc490e5b5"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[?25l   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/1.8 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m85.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h"
          ]
        }
      ],
      "source": [
        "!pip install -q -U pip\n",
        "!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121\n",
        "!pip install -q bitsandbytes peft trl transformers datasets accelerate matplotlib requests huggingface_hub unsloth"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from huggingface_hub import login\n",
        "login()  # paste your HF token when prompted\n"
      ],
      "metadata": {
        "id": "tL1kUtkE3aB1",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 17,
          "referenced_widgets": [
            "efa20c94265a4e8aa5c2b42b30d8a0dc",
            "c75a89d0eeb04c8c9563e536cf3617de",
            "823e59efcb874d0f875829bdb729f5f1",
            "2df40cb11c5d41d6856d53cdbd57357b",
            "f06edce066f94a53a8b7111197c9d1ea",
            "4d1b5e16c7484bb2b38691acc1fd372a",
            "07e8ea9a765b4bb38aa9486c08c7e876",
            "1254edd893ea4527840a08e998a42e66",
            "e0dd533ff9d14edda59583096c3ed44c",
            "37a76ab6f9fe46c68598ecde37caa9ff",
            "62ae40b3ca12465485ca5469798b565b",
            "06e8ddba2c9a47e98ac550ea4e35d992",
            "972ce993ae0943e58e2acbd56bf0de7b",
            "b4ddb223ea4d4046908f06cf8f8c72ee",
            "210cdf5848f143fca9b77855e24de566",
            "9619fd586f35410a9b3a18c8c44098dd",
            "e06302bb607344ba8e1e931f5c55f1a4",
            "694961c954f64e96979f260fc8072430",
            "9b5cd082ede1494e9476b29935308eb2",
            "14303e52186d40c38cb936202917dbec"
          ]
        },
        "outputId": "f936cb56-604f-4013-8612-5d649afb97e2"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "efa20c94265a4e8aa5c2b42b30d8a0dc"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!python train.py \\\n",
        "    --model Qwen/Qwen2.5-3B-Instruct \\\n",
        "    --task all \\\n",
        "    --episodes 30 \\\n",
        "    --load_in_4bit \\\n",
        "    --grpo_max_steps 10 \\\n",
        "    --env_url https://ogrohit-logtriage-env.hf.space \\\n",
        "    --push_to_hub \\\n",
        "    --hub_model_id OGrohit/logtriage-sre-agent"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "psC2BtB6HXFm",
        "outputId": "85a43f8f-f0f3-470f-be0f-d050d94e3425"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2026-04-26 01:14:42.333349: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
            "E0000 00:00:1777166082.355494   10026 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "E0000 00:00:1777166082.362449   10026 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "W0000 00:00:1777166082.381114   10026 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1777166082.381164   10026 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1777166082.381169   10026 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "W0000 00:00:1777166082.381173   10026 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
            "2026-04-26 01:14:42.385910: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
            "To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "Skipping import of cpp extensions due to incompatible torch version. Please upgrade to torch >= 2.11.0 (found 2.10.0+cu128).\n",
            "/content/train.py:45: UserWarning: WARNING: Unsloth should be imported before trl, transformers, peft to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations.\n",
            "\n",
            "Please restructure your imports with 'import unsloth' at the top of your file.\n",
            "  from unsloth import FastLanguageModel\n",
            "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
            "🦥 Unsloth Zoo will now patch everything to make training faster!\n",
            "Unable to import `torchao` Tensor objects. This may affect loading checkpoints serialized with `torchao`\n",
            "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.\n",
            "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.\n",
            "\n",
            "[LOGGING] LogTriageEnv GRPO Training\n",
            "   Model:   Qwen/Qwen2.5-3B-Instruct\n",
            "   Task:    all\n",
            "   Episodes: 30\n",
            "   Device:  cuda\n",
            "   Env URL: https://ogrohit-logtriage-env.hf.space\n",
            "\n",
            "[OK] Connected to LogTriageEnv at https://ogrohit-logtriage-env.hf.space\n",
            "[MODEL] Loading model: Qwen/Qwen2.5-3B-Instruct\n",
            "[QLoRA] Loading model with BitsAndBytes 4-bit...\n",
            "tokenizer_config.json: 7.30kB [00:00, 4.55MB/s]\n",
            "vocab.json: 2.78MB [00:00, 104MB/s]\n",
            "merges.txt: 1.67MB [00:00, 111MB/s]\n",
            "tokenizer.json: 7.03MB [00:00, 139MB/s]\n",
            "[OK] 4-bit BitsAndBytesConfig applied\n",
            "config.json: 100% 661/661 [00:00<00:00, 5.12MB/s]\n",
            "model.safetensors.index.json: 35.6kB [00:00, 101MB/s]\n",
            "Fetching 2 files:   0% 0/2 [00:00<?, ?it/s]\n",
            "model-00001-of-00002.safetensors:   0% 0.00/3.97G [00:00<?, ?B/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 0.00/3.97G [00:00<?, ?B/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 0.00/3.97G [00:00<?, ?B/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 0.00/3.97G [00:00<?, ?B/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 0.00/3.97G [00:00<?, ?B/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 0.00/3.97G [00:01<?, ?B/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 0.00/3.97G [00:01<?, ?B/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 0.00/3.97G [00:01<?, ?B/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 20.3k/3.97G [00:01<10:54:45, 101kB/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:02<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:02<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   0% 0.00/2.20G [00:09<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:   4% 90.8M/2.20G [00:15<02:10, 16.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  10% 225M/2.20G [00:15<00:40, 48.7MB/s] \u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:   0% 20.3k/3.97G [00:15<10:54:45, 101kB/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:   8% 335M/3.97G [00:21<03:33, 17.0MB/s]   \u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  10% 225M/2.20G [00:25<00:40, 48.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  25% 560M/2.20G [00:30<01:02, 26.3MB/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:   8% 335M/3.97G [00:35<03:33, 17.0MB/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  25% 560M/2.20G [00:45<01:02, 26.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  44% 961M/2.20G [00:50<00:56, 22.2MB/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:  23% 921M/3.97G [00:50<02:41, 18.9MB/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  58% 1.28G/2.20G [01:02<00:38, 24.2MB/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:  31% 1.24G/3.97G [01:04<02:15, 20.1MB/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  72% 1.59G/2.20G [01:15<00:25, 23.8MB/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:  31% 1.24G/3.97G [01:15<02:15, 20.1MB/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:  41% 1.65G/3.97G [01:19<01:42, 22.6MB/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  72% 1.59G/2.20G [01:25<00:25, 23.8MB/s]\u001b[A\u001b[A\n",
            "model-00001-of-00002.safetensors:  50% 2.00G/3.97G [01:26<01:11, 27.6MB/s]\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors:  92% 2.03G/2.20G [01:26<00:06, 28.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "model-00002-of-00002.safetensors: 100% 2.20G/2.20G [01:37<00:00, 22.6MB/s]\n",
            "\n",
            "model-00001-of-00002.safetensors:  58% 2.32G/3.97G [01:38<01:00, 27.4MB/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:  68% 2.72G/3.97G [01:44<00:36, 34.7MB/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:  76% 3.03G/3.97G [01:48<00:23, 40.0MB/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:  84% 3.35G/3.97G [01:50<00:11, 52.9MB/s]\u001b[A\n",
            "model-00001-of-00002.safetensors:  94% 3.72G/3.97G [01:55<00:04, 58.5MB/s]\u001b[A\n",
            "model-00001-of-00002.safetensors: 100% 3.97G/3.97G [01:56<00:00, 34.2MB/s]\n",
            "Fetching 2 files: 100% 2/2 [01:56<00:00, 58.32s/it] \n",
            "Loading checkpoint shards: 100% 2/2 [00:26<00:00, 13.17s/it]\n",
            "generation_config.json: 100% 242/242 [00:00<00:00, 1.63MB/s]\n",
            "[OK] Model loaded in 4-bit quantized mode\n",
            "[QLoRA] Applying LoRA adapter...\n",
            "trainable params: 29,933,568 || all params: 3,115,872,256 || trainable%: 0.9607\n",
            "[OK] LoRA adapter attached (r=16, alpha=32)\n",
            "[OK] Model loaded\n",
            "\n",
            "\n",
            "============================================================\n",
            "[TRAIN] Training on task: single_crash\n",
            "============================================================\n",
            "  Episode   1/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.350\n",
            "  Episode   2/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.150\n",
            "  Episode   3/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.183\n",
            "  Episode   4/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.137\n",
            "  Episode   5/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.100\n",
            "  Episode   6/30 | Reward: +0.350 | Steps:  3 | Rolling avg (10): 0.142\n",
            "  Episode   7/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.114\n",
            "  Episode   8/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.131\n",
            "  Episode   9/30 | Reward: +0.500 | Steps: 14 | Rolling avg (10): 0.172\n",
            "  Episode  10/30 | Reward: +0.250 | Steps:  3 | Rolling avg (10): 0.180\n",
            "  Episode  11/30 | Reward: +0.600 | Steps:  3 | Rolling avg (10): 0.205\n",
            "  Episode  12/30 | Reward: +0.400 | Steps:  7 | Rolling avg (10): 0.250\n",
            "  Episode  13/30 | Reward: +0.250 | Steps:  3 | Rolling avg (10): 0.250\n",
            "  Episode  14/30 | Reward: +0.150 | Steps:  4 | Rolling avg (10): 0.265\n",
            "  Episode  15/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.305\n",
            "  Episode  16/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.265\n",
            "  Episode  17/30 | Reward: +0.650 | Steps:  5 | Rolling avg (10): 0.335\n",
            "  Episode  18/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.345\n",
            "  Episode  19/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.290\n",
            "  Episode  20/30 | Reward: +0.150 | Steps:  4 | Rolling avg (10): 0.280\n",
            "  Episode  21/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.245\n",
            "  Episode  22/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.200\n",
            "  Episode  23/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.170\n",
            "  Episode  24/30 | Reward: +0.100 | Steps:  6 | Rolling avg (10): 0.165\n",
            "  [CHECKPOINT] Saved single_crash ep25 -> ./phase2_checkpoints/single_crash_ep25.json\n",
            "  Episode  25/30 | Reward: +0.150 | Steps:  4 | Rolling avg (10): 0.145\n",
            "  Episode  26/30 | Reward: +0.100 | Steps:  6 | Rolling avg (10): 0.160\n",
            "  Episode  27/30 | Reward: -0.050 | Steps:  6 | Rolling avg (10): 0.090\n",
            "  Episode  28/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.055\n",
            "  Episode  29/30 | Reward: +0.100 | Steps:  6 | Rolling avg (10): 0.070\n",
            "  Episode  30/30 | Reward: +0.100 | Steps:  6 | Rolling avg (10): 0.065\n",
            "\n",
            "[STATS] single_crash Summary:\n",
            "     First 10 episodes avg: 0.180\n",
            "     Last  10 episodes avg: 0.065\n",
            "     Improvement:           -0.115\n",
            "\n",
            "============================================================\n",
            "[TRAIN] Training on task: cascading_failure\n",
            "============================================================\n",
            "  Episode   1/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.250\n",
            "  Episode   2/30 | Reward: -0.200 | Steps:  8 | Rolling avg (10): 0.025\n",
            "  Episode   3/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.133\n",
            "  Episode   4/30 | Reward: -0.200 | Steps:  8 | Rolling avg (10): 0.050\n",
            "  Episode   5/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.040\n",
            "  Episode   6/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.033\n",
            "  Episode   7/30 | Reward: +0.450 | Steps:  7 | Rolling avg (10): 0.093\n",
            "  Episode   8/30 | Reward: +0.300 | Steps:  4 | Rolling avg (10): 0.119\n",
            "  Episode   9/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.100\n",
            "  Episode  10/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.090\n",
            "  Episode  11/30 | Reward: +0.500 | Steps:  6 | Rolling avg (10): 0.115\n",
            "  Episode  12/30 | Reward: +0.350 | Steps:  3 | Rolling avg (10): 0.170\n",
            "  Episode  13/30 | Reward: +0.300 | Steps:  4 | Rolling avg (10): 0.165\n",
            "  Episode  14/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.180\n",
            "  Episode  15/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.215\n",
            "  Episode  16/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.250\n",
            "  Episode  17/30 | Reward: +0.400 | Steps:  8 | Rolling avg (10): 0.245\n",
            "  Episode  18/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.210\n",
            "  Episode  19/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.210\n",
            "  Episode  20/30 | Reward: +0.650 | Steps:  5 | Rolling avg (10): 0.275\n",
            "  Episode  21/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.250\n",
            "  Episode  22/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.210\n",
            "  Episode  23/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.180\n",
            "  Episode  24/30 | Reward: +0.100 | Steps:  6 | Rolling avg (10): 0.195\n",
            "  [CHECKPOINT] Saved cascading_failure ep25 -> ./phase2_checkpoints/cascading_failure_ep25.json\n",
            "  Episode  25/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.185\n",
            "  Episode  26/30 | Reward: +0.200 | Steps:  7 | Rolling avg (10): 0.170\n",
            "  Episode  27/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.125\n",
            "  Episode  28/30 | Reward: +0.300 | Steps:  4 | Rolling avg (10): 0.160\n",
            "  Episode  29/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.160\n",
            "  Episode  30/30 | Reward: +0.100 | Steps:  6 | Rolling avg (10): 0.105\n",
            "\n",
            "[STATS] cascading_failure Summary:\n",
            "     First 10 episodes avg: 0.090\n",
            "     Last  10 episodes avg: 0.105\n",
            "     Improvement:           +0.015\n",
            "\n",
            "============================================================\n",
            "[TRAIN] Training on task: silent_degradation\n",
            "============================================================\n",
            "  Episode   1/30 | Reward: +0.300 | Steps:  4 | Rolling avg (10): 0.300\n",
            "  Episode   2/30 | Reward: +0.100 | Steps:  6 | Rolling avg (10): 0.200\n",
            "  Episode   3/30 | Reward: -0.200 | Steps:  8 | Rolling avg (10): 0.067\n",
            "  Episode   4/30 | Reward: +0.050 | Steps:  7 | Rolling avg (10): 0.063\n",
            "  Episode   5/30 | Reward: +0.650 | Steps:  5 | Rolling avg (10): 0.180\n",
            "  Episode   6/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.142\n",
            "  Episode   7/30 | Reward: +0.650 | Steps:  5 | Rolling avg (10): 0.214\n",
            "  Episode   8/30 | Reward: +0.050 | Steps:  7 | Rolling avg (10): 0.194\n",
            "  Episode   9/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.200\n",
            "  Episode  10/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.180\n",
            "  Episode  11/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.185\n",
            "  Episode  12/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.175\n",
            "  Episode  13/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.230\n",
            "  Episode  14/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.220\n",
            "  Episode  15/30 | Reward: +0.050 | Steps:  7 | Rolling avg (10): 0.160\n",
            "  Episode  16/30 | Reward: +0.350 | Steps:  3 | Rolling avg (10): 0.200\n",
            "  Episode  17/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.130\n",
            "  Episode  18/30 | Reward: +0.350 | Steps:  6 | Rolling avg (10): 0.160\n",
            "  Episode  19/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.160\n",
            "  Episode  20/30 | Reward: +0.350 | Steps:  8 | Rolling avg (10): 0.195\n",
            "  Episode  21/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.185\n",
            "  Episode  22/30 | Reward: +0.300 | Steps:  4 | Rolling avg (10): 0.215\n",
            "  Episode  23/30 | Reward: -0.200 | Steps:  8 | Rolling avg (10): 0.160\n",
            "  Episode  24/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.190\n",
            "  [CHECKPOINT] Saved silent_degradation ep25 -> ./phase2_checkpoints/silent_degradation_ep25.json\n",
            "  Episode  25/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.210\n",
            "  Episode  26/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.170\n",
            "  Episode  27/30 | Reward: +0.250 | Steps:  5 | Rolling avg (10): 0.200\n",
            "  Episode  28/30 | Reward: +0.000 | Steps:  8 | Rolling avg (10): 0.165\n",
            "  Episode  29/30 | Reward: -0.050 | Steps:  8 | Rolling avg (10): 0.135\n",
            "  Episode  30/30 | Reward: +0.100 | Steps:  7 | Rolling avg (10): 0.110\n",
            "\n",
            "[STATS] silent_degradation Summary:\n",
            "     First 10 episodes avg: 0.180\n",
            "     Last  10 episodes avg: 0.110\n",
            "     Improvement:           -0.070\n",
            "[PLOT] Reward curve saved -> reward_curve.png\n",
            "\n",
            "[GRPO] Collected 589 trajectory steps from rollout.\n",
            "[GRPO] Running GRPO fine-tuning on 589 trajectory steps...\n",
            "[GRPO] Precision: fp16 (bf16 unsupported on this GPU)\n",
            "  0% 0/10 [00:00<?, ?it/s][WARN] GRPO trainer error: No inf checks were recorded prior to update.\n",
            "[WARN] Continuing with rollout-only results.\n",
            "[SAVE] Merging LoRA adapter into base weights...\n",
            "/usr/local/lib/python3.12/dist-packages/peft/tuners/lora/bnb.py:397: UserWarning: Merge lora module to 4-bit linear may get different generations due to rounding errors.\n",
            "  warnings.warn(\n",
            "[OK] LoRA merged — saving full model\n",
            "\n",
            "[SAVE] Model saved -> ./logtriage-trained\n",
            "\n",
            "[PUSH] Pushing to HuggingFace Hub: OGrohit/logtriage-sre-agent\n",
            "\n",
            "README.md: 100% 684/684 [00:00<00:00, 4.60MB/s]\n",
            "Processing Files (0 / 0)      : |          |  0.00B /  0.00B            \n",
            "New Data Upload               : |          |  0.00B /  0.00B            \u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors:   1% 30.8M/2.68G [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :   1% 30.8M/2.68G [00:02<04:13, 10.4MB/s, 11.8MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :   2% 53.5M/2.68G [00:03<02:14, 19.5MB/s, 19.1MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :   3% 84.5M/2.68G [00:03<01:14, 34.7MB/s, 28.2MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :   4% 115M/2.68G [00:03<00:50, 51.2MB/s, 36.1MB/s  ] \n",
            "\n",
            "Processing Files (0 / 1)      :   5% 146M/2.68G [00:03<00:37, 68.1MB/s, 43.0MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :   7% 178M/2.68G [00:03<00:29, 85.2MB/s, 49.4MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :   8% 209M/2.68G [00:04<00:24, 99.8MB/s, 54.9MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :   9% 240M/2.68G [00:04<00:21, 113MB/s, 59.9MB/s  ] \n",
            "\n",
            "Processing Files (0 / 1)      :  10% 271M/2.68G [00:04<00:19, 123MB/s, 64.4MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  11% 294M/2.68G [00:04<00:19, 121MB/s, 66.7MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  12% 333M/2.68G [00:04<00:16, 141MB/s, 72.3MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  13% 356M/2.68G [00:05<00:17, 135MB/s, 74.2MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  14% 387M/2.68G [00:05<00:16, 141MB/s, 77.5MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  16% 419M/2.68G [00:05<00:15, 145MB/s, 80.5MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  17% 449M/2.68G [00:05<00:15, 148MB/s, 83.2MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  18% 481M/2.68G [00:05<00:14, 150MB/s, 85.8MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  19% 512M/2.68G [00:06<00:14, 152MB/s, 88.2MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  20% 543M/2.68G [00:06<00:13, 153MB/s, 90.4MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  21% 574M/2.68G [00:06<00:13, 154MB/s, 92.5MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  23% 605M/2.68G [00:06<00:13, 154MB/s, 94.5MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  24% 636M/2.68G [00:06<00:13, 155MB/s, 96.4MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  25% 668M/2.68G [00:07<00:12, 156MB/s, 98.2MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  26% 699M/2.68G [00:07<00:12, 156MB/s, 99.9MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  27% 730M/2.68G [00:07<00:12, 155MB/s,  101MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  28% 761M/2.68G [00:07<00:12, 155MB/s,  103MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  30% 791M/2.68G [00:07<00:12, 154MB/s,  104MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  31% 822M/2.68G [00:08<00:12, 154MB/s,  105MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  32% 846M/2.68G [00:08<00:12, 143MB/s,  106MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  32% 869M/2.68G [00:08<00:13, 134MB/s,  106MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  33% 884M/2.68G [00:08<00:15, 118MB/s,  105MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  34% 907M/2.68G [00:08<00:15, 117MB/s,  105MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  34% 923M/2.68G [00:09<00:16, 105MB/s,  105MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  35% 946M/2.68G [00:09<00:15, 109MB/s,  105MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  36% 962M/2.68G [00:09<00:17, 99.0MB/s,  105MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  37% 985M/2.68G [00:09<00:16, 104MB/s,  105MB/s  ] \n",
            "\n",
            "Processing Files (0 / 1)      :  37% 1.00G/2.68G [00:09<00:17, 96.5MB/s,  104MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  38% 1.02G/2.68G [00:10<00:18, 91.1MB/s,  104MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  39% 1.03G/2.68G [00:10<00:19, 86.5MB/s,  103MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  39% 1.06G/2.68G [00:10<00:16, 96.6MB/s,  103MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  40% 1.07G/2.68G [00:10<00:17, 90.3MB/s,  105MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  41% 1.09G/2.68G [00:10<00:15, 99.4MB/s,  107MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  41% 1.11G/2.68G [00:11<00:16, 92.7MB/s,  109MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  42% 1.13G/2.68G [00:11<00:15, 100MB/s,  111MB/s  ] \n",
            "\n",
            "Processing Files (0 / 1)      :  43% 1.16G/2.68G [00:11<00:14, 105MB/s,  113MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  44% 1.17G/2.68G [00:11<00:15, 97.6MB/s,  115MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  45% 1.20G/2.68G [00:11<00:12, 114MB/s,  118MB/s  ] \n",
            "\n",
            "Processing Files (0 / 1)      :  46% 1.23G/2.68G [00:12<00:12, 119MB/s,  121MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  47% 1.25G/2.68G [00:12<00:11, 119MB/s,  123MB/s  ]\n",
            "\n",
            "  ...4fmqprp/model.safetensors:  47% 1.25G/2.68G [00:09<00:11, 127MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  47% 1.27G/2.68G [00:12<00:18, 77.4MB/s,  124MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  48% 1.29G/2.68G [00:12<00:14, 92.7MB/s,  127MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  49% 1.31G/2.68G [00:13<00:15, 88.3MB/s,  125MB/s  ]\n",
            "New Data Upload               :   1% 793k/67.0M [00:13<18:19, 60.3kB/s, 77.7kB/s  ]\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors:  49% 1.31G/2.68G [00:10<00:11, 123MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors:  49% 1.31G/2.68G [00:10<00:11, 120MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  49% 1.31G/2.68G [00:13<00:32, 42.2MB/s,  117MB/s  ]\n",
            "New Data Upload               :   2% 1.59M/67.0M [00:13<07:56, 137kB/s,  155kB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  49% 1.31G/2.68G [00:13<00:39, 34.6MB/s,  114MB/s  ]\n",
            "New Data Upload               :   4% 2.38M/67.0M [00:13<04:23, 246kB/s,  233kB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  49% 1.31G/2.68G [00:14<00:43, 31.3MB/s,  111MB/s  ]\n",
            "New Data Upload               :   9% 6.34M/67.0M [00:14<01:00, 1.01MB/s,  622kB/s  ]\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors:  49% 1.31G/2.68G [00:11<00:12, 113MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  49% 1.31G/2.68G [00:14<01:08, 19.9MB/s,  105MB/s  ]\n",
            "New Data Upload               :  11% 7.13M/67.0M [00:14<00:54, 1.10MB/s,  700kB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  49% 1.32G/2.68G [00:14<01:18, 17.3MB/s,  102MB/s  ]\n",
            "New Data Upload               :  13% 8.72M/67.0M [00:14<00:37, 1.56MB/s,  855kB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  49% 1.32G/2.68G [00:14<01:12, 18.8MB/s,  101MB/s  ]\n",
            "New Data Upload               :  20% 13.5M/67.0M [00:14<00:15, 3.49MB/s, 1.32MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  50% 1.35G/2.68G [00:15<00:27, 48.6MB/s, 99.5MB/s  ]\n",
            "New Data Upload               :  27% 18.2M/67.0M [00:15<00:08, 5.73MB/s, 1.79MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  51% 1.35G/2.68G [00:15<00:31, 42.0MB/s, 97.7MB/s  ]\n",
            "New Data Upload               :  34% 23.0M/67.0M [00:15<00:05, 8.19MB/s, 2.25MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  51% 1.36G/2.68G [00:15<00:35, 37.0MB/s, 95.1MB/s  ]\n",
            "New Data Upload               :  41% 27.7M/67.0M [00:15<00:03, 10.7MB/s, 2.72MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  51% 1.38G/2.68G [00:15<00:24, 53.9MB/s, 93.9MB/s  ]\n",
            "New Data Upload               :  48% 32.5M/67.0M [00:15<00:02, 13.2MB/s, 3.19MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  53% 1.41G/2.68G [00:15<00:14, 85.0MB/s, 94.1MB/s  ]\n",
            "New Data Upload               :  58% 38.8M/67.0M [00:15<00:01, 17.1MB/s, 3.81MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  54% 1.44G/2.68G [00:16<00:12, 98.3MB/s, 93.6MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  55% 1.46G/2.68G [00:16<00:11, 108MB/s, 93.1MB/s  ] \n",
            "New Data Upload               :  65% 43.6M/67.0M [00:16<00:01, 15.1MB/s, 4.27MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  56% 1.50G/2.68G [00:16<00:08, 133MB/s, 93.8MB/s  ]\n",
            "New Data Upload               :  73% 49.1M/67.0M [00:16<00:01, 17.8MB/s, 4.82MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  57% 1.53G/2.68G [00:16<00:08, 132MB/s, 93.3MB/s  ]\n",
            "New Data Upload               :  82% 54.7M/67.0M [00:16<00:00, 20.2MB/s, 5.36MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  58% 1.56G/2.68G [00:16<00:07, 142MB/s, 93.5MB/s  ]\n",
            "New Data Upload               :  91% 61.0M/67.0M [00:16<00:00, 23.1MB/s, 5.98MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  59% 1.59G/2.68G [00:17<00:07, 149MB/s, 93.7MB/s  ]\n",
            "New Data Upload               :  98% 65.8M/67.0M [00:17<00:00, 23.2MB/s, 6.45MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  60% 1.62G/2.68G [00:17<00:07, 145MB/s, 93.3MB/s  ]\n",
            "New Data Upload               :  99% 66.6M/67.0M [00:17<00:00, 18.0MB/s, 6.53MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  62% 1.65G/2.68G [00:17<00:07, 144MB/s, 92.9MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  63% 1.67G/2.68G [00:17<00:07, 141MB/s, 92.6MB/s  ]\n",
            "\n",
            "  ...4fmqprp/model.safetensors:  63% 1.67G/2.68G [00:15<00:09, 109MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  63% 1.70G/2.68G [00:18<00:09, 102MB/s, 88.8MB/s  ]\n",
            "New Data Upload               :  50% 67.0M/134M [00:18<00:09, 7.37MB/s, 6.57MB/s  ] \u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  64% 1.72G/2.68G [00:18<00:09, 102MB/s, 87.7MB/s  ]\n",
            "New Data Upload               :  50% 67.6M/134M [00:18<00:10, 6.52MB/s, 6.63MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  65% 1.74G/2.68G [00:18<00:08, 111MB/s, 88.1MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  66% 1.77G/2.68G [00:18<00:07, 117MB/s, 88.5MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  67% 1.79G/2.68G [00:18<00:07, 113MB/s, 89.0MB/s  ]\n",
            "New Data Upload               :  51% 68.1M/134M [00:18<00:16, 4.07MB/s, 6.68MB/s  ]\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors:  67% 1.79G/2.68G [00:16<00:08, 109MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  68% 1.82G/2.68G [00:19<00:09, 89.2MB/s, 87.5MB/s  ]\n",
            "New Data Upload               :  52% 69.1M/134M [00:19<00:17, 3.65MB/s, 6.78MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  69% 1.85G/2.68G [00:19<00:07, 106MB/s, 88.3MB/s  ] \n",
            "New Data Upload               :  52% 70.2M/134M [00:19<00:16, 3.93MB/s, 6.88MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  69% 1.85G/2.68G [00:19<00:10, 81.7MB/s, 87.1MB/s  ]\n",
            "New Data Upload               :  54% 72.3M/134M [00:19<00:11, 5.23MB/s, 7.09MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  70% 1.88G/2.68G [00:19<00:07, 104MB/s, 88.0MB/s  ] \n",
            "New Data Upload               :  55% 74.4M/134M [00:19<00:09, 6.39MB/s, 7.29MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  72% 1.92G/2.68G [00:20<00:06, 121MB/s, 89.7MB/s  ]\n",
            "New Data Upload               :  58% 78.1M/134M [00:20<00:06, 9.24MB/s, 7.65MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  73% 1.94G/2.68G [00:20<00:05, 123MB/s, 90.7MB/s  ]\n",
            "New Data Upload               :  61% 81.2M/134M [00:20<00:04, 10.9MB/s, 7.96MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  74% 1.98G/2.68G [00:20<00:04, 143MB/s, 93.0MB/s  ]\n",
            "New Data Upload               :  61% 82.3M/134M [00:20<00:05, 9.37MB/s, 8.06MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  75% 2.01G/2.68G [00:20<00:04, 141MB/s, 93.3MB/s  ]\n",
            "New Data Upload               :  64% 85.4M/134M [00:20<00:04, 11.1MB/s, 8.37MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  76% 2.04G/2.68G [00:20<00:04, 148MB/s, 95.0MB/s  ]\n",
            "New Data Upload               :  66% 88.6M/134M [00:20<00:03, 12.5MB/s, 8.69MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  77% 2.07G/2.68G [00:21<00:04, 144MB/s, 95.3MB/s  ]\n",
            "New Data Upload               :  69% 92.2M/134M [00:21<00:02, 14.2MB/s, 9.04MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  78% 2.08G/2.68G [00:21<00:04, 124MB/s, 95.3MB/s  ]\n",
            "New Data Upload               :  48% 96.4M/201M [00:21<00:06, 16.2MB/s, 9.46MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  78% 2.10G/2.68G [00:21<00:05, 110MB/s, 94.5MB/s  ]\n",
            "New Data Upload               :  50% 101M/201M [00:21<00:05, 17.6MB/s, 9.87MB/s  ] \u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  79% 2.12G/2.68G [00:21<00:05, 103MB/s, 93.9MB/s  ]\n",
            "New Data Upload               :  52% 104M/201M [00:21<00:05, 17.8MB/s, 10.2MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  80% 2.14G/2.68G [00:21<00:04, 111MB/s, 94.9MB/s  ]\n",
            "New Data Upload               :  53% 107M/201M [00:21<00:05, 17.1MB/s, 10.5MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  81% 2.17G/2.68G [00:22<00:04, 119MB/s, 94.6MB/s  ]\n",
            "New Data Upload               :  55% 111M/201M [00:22<00:05, 17.7MB/s, 10.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  82% 2.19G/2.68G [00:22<00:04, 112MB/s, 93.8MB/s  ]\n",
            "New Data Upload               :  57% 115M/201M [00:22<00:04, 17.9MB/s, 11.3MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  82% 2.21G/2.68G [00:22<00:04, 107MB/s, 93.4MB/s  ]\n",
            "New Data Upload               :  59% 119M/201M [00:22<00:04, 18.8MB/s, 11.7MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  83% 2.23G/2.68G [00:22<00:03, 116MB/s, 96.1MB/s  ]\n",
            "New Data Upload               :  61% 123M/201M [00:22<00:04, 19.4MB/s, 12.1MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  84% 2.25G/2.68G [00:22<00:04, 106MB/s, 96.6MB/s  ]\n",
            "New Data Upload               :  62% 125M/201M [00:22<00:04, 16.7MB/s, 12.3MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  85% 2.28G/2.68G [00:23<00:03, 112MB/s, 96.3MB/s  ]\n",
            "New Data Upload               :  63% 127M/201M [00:23<00:05, 14.1MB/s, 12.4MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  86% 2.30G/2.68G [00:23<00:03, 109MB/s, 96.8MB/s  ]\n",
            "New Data Upload               :  66% 132M/201M [00:23<00:03, 17.8MB/s, 12.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  87% 2.32G/2.68G [00:23<00:03, 108MB/s, 98.9MB/s  ]\n",
            "New Data Upload               :  69% 138M/201M [00:23<00:02, 21.9MB/s, 13.5MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  87% 2.34G/2.68G [00:23<00:03, 106MB/s,  101MB/s  ]\n",
            "New Data Upload               :  71% 143M/201M [00:23<00:02, 22.5MB/s, 14.0MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  88% 2.36G/2.68G [00:23<00:02, 110MB/s,  103MB/s  ]\n",
            "New Data Upload               :  72% 145M/201M [00:23<00:03, 18.1MB/s, 14.0MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  89% 2.37G/2.68G [00:24<00:03, 95.0MB/s,  104MB/s  ]\n",
            "New Data Upload               :  73% 146M/201M [00:24<00:03, 15.0MB/s, 14.1MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  89% 2.37G/2.68G [00:24<00:04, 69.1MB/s,  104MB/s  ]\n",
            "New Data Upload               :  74% 148M/201M [00:24<00:04, 12.9MB/s, 13.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  89% 2.39G/2.68G [00:24<00:03, 71.9MB/s,  106MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  89% 2.39G/2.68G [00:24<00:05, 53.7MB/s,  106MB/s  ]\n",
            "New Data Upload               :  75% 150M/201M [00:24<00:05, 9.26MB/s, 14.0MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  89% 2.39G/2.68G [00:24<00:07, 40.1MB/s,  106MB/s  ]\n",
            "New Data Upload               :  75% 152M/201M [00:24<00:05, 8.96MB/s, 14.0MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  89% 2.40G/2.68G [00:25<00:09, 30.4MB/s,  105MB/s  ]\n",
            "New Data Upload               :  76% 153M/201M [00:25<00:05, 8.69MB/s, 13.7MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  90% 2.41G/2.68G [00:25<00:05, 48.6MB/s,  105MB/s  ]\n",
            "New Data Upload               :  78% 156M/201M [00:25<00:04, 10.6MB/s, 13.5MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  91% 2.45G/2.68G [00:25<00:02, 83.3MB/s,  107MB/s  ]\n",
            "New Data Upload               :  79% 158M/201M [00:25<00:04, 9.89MB/s, 13.2MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  93% 2.48G/2.68G [00:25<00:01, 105MB/s,  110MB/s  ] \n",
            "New Data Upload               :  79% 160M/201M [00:25<00:04, 9.30MB/s, 12.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  94% 2.51G/2.68G [00:25<00:01, 122MB/s,  111MB/s  ]\n",
            "New Data Upload               :  80% 161M/201M [00:25<00:04, 8.89MB/s, 12.6MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  95% 2.54G/2.68G [00:26<00:01, 132MB/s,  111MB/s  ]\n",
            "\n",
            "Processing Files (0 / 1)      :  95% 2.55G/2.68G [00:26<00:01, 107MB/s,  109MB/s  ]\n",
            "New Data Upload               :  61% 163M/268M [00:26<00:14, 7.29MB/s, 12.2MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  96% 2.57G/2.68G [00:26<00:00, 110MB/s,  109MB/s  ]\n",
            "New Data Upload               :  62% 166M/270M [00:26<00:11, 9.37MB/s, 12.0MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  96% 2.58G/2.68G [00:26<00:01, 79.9MB/s,  106MB/s  ]\n",
            "New Data Upload               :  63% 168M/270M [00:26<00:10, 9.68MB/s, 11.7MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  96% 2.58G/2.68G [00:26<00:01, 58.3MB/s,  103MB/s  ]\n",
            "New Data Upload               :  63% 170M/270M [00:26<00:10, 9.21MB/s, 11.3MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  96% 2.58G/2.68G [00:27<00:02, 43.2MB/s,  100MB/s  ]\n",
            "New Data Upload               :  64% 172M/270M [00:27<00:11, 8.86MB/s, 10.8MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  96% 2.58G/2.68G [00:27<00:02, 34.2MB/s, 97.1MB/s  ]\n",
            "New Data Upload               :  65% 174M/270M [00:27<00:09, 10.1MB/s, 10.6MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  97% 2.58G/2.68G [00:27<00:03, 27.1MB/s, 94.6MB/s  ]\n",
            "New Data Upload               :  65% 176M/270M [00:27<00:09, 10.2MB/s, 10.8MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  97% 2.59G/2.68G [00:27<00:04, 21.0MB/s, 92.0MB/s  ]\n",
            "New Data Upload               :  66% 178M/270M [00:27<00:09, 9.19MB/s, 10.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  97% 2.59G/2.68G [00:27<00:04, 21.0MB/s, 89.7MB/s  ]\n",
            "New Data Upload               :  68% 182M/270M [00:27<00:06, 12.7MB/s, 11.3MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  97% 2.59G/2.68G [00:28<00:05, 17.1MB/s, 90.0MB/s  ]\n",
            "New Data Upload               :  68% 184M/270M [00:28<00:07, 11.3MB/s, 11.5MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  97% 2.60G/2.68G [00:28<00:04, 19.1MB/s, 88.2MB/s  ]\n",
            "New Data Upload               :  70% 188M/270M [00:28<00:05, 15.0MB/s, 11.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  97% 2.60G/2.68G [00:28<00:05, 15.8MB/s, 86.3MB/s  ]\n",
            "New Data Upload               :  70% 190M/270M [00:28<00:06, 12.9MB/s, 12.0MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  97% 2.60G/2.68G [00:28<00:04, 18.2MB/s, 84.2MB/s  ]\n",
            "New Data Upload               :  72% 195M/270M [00:28<00:04, 16.2MB/s, 12.5MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  97% 2.61G/2.68G [00:28<00:03, 19.9MB/s, 82.0MB/s  ]\n",
            "New Data Upload               :  74% 199M/270M [00:28<00:03, 18.5MB/s, 12.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  98% 2.61G/2.68G [00:29<00:02, 23.5MB/s, 80.6MB/s  ]\n",
            "New Data Upload               :  76% 206M/270M [00:29<00:02, 22.5MB/s, 13.5MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  98% 2.62G/2.68G [00:29<00:02, 22.8MB/s, 81.0MB/s  ]\n",
            "New Data Upload               :  78% 210M/270M [00:29<00:02, 22.1MB/s, 13.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  98% 2.62G/2.68G [00:29<00:02, 21.5MB/s, 79.1MB/s  ]\n",
            "New Data Upload               :  79% 214M/270M [00:29<00:02, 21.0MB/s, 14.2MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  98% 2.63G/2.68G [00:29<00:02, 21.4MB/s, 76.3MB/s  ]\n",
            "New Data Upload               :  81% 218M/270M [00:29<00:02, 21.1MB/s, 14.5MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  98% 2.63G/2.68G [00:29<00:01, 23.8MB/s, 76.7MB/s  ]\n",
            "New Data Upload               :  83% 224M/270M [00:29<00:01, 23.5MB/s, 14.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  98% 2.63G/2.68G [00:30<00:02, 19.0MB/s, 73.6MB/s  ]\n",
            "New Data Upload               :  84% 225M/270M [00:30<00:02, 18.8MB/s, 14.8MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  99% 2.64G/2.68G [00:30<00:01, 20.5MB/s, 70.9MB/s  ]\n",
            "New Data Upload               :  85% 230M/270M [00:30<00:01, 20.3MB/s, 14.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  99% 2.64G/2.68G [00:30<00:01, 23.9MB/s, 68.9MB/s  ]\n",
            "New Data Upload               :  88% 237M/270M [00:30<00:01, 23.8MB/s, 15.2MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  99% 2.65G/2.68G [00:30<00:01, 21.5MB/s, 65.5MB/s  ]\n",
            "New Data Upload               :  89% 240M/270M [00:30<00:01, 21.4MB/s, 15.4MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  99% 2.65G/2.68G [00:30<00:01, 19.8MB/s, 63.2MB/s  ]\n",
            "New Data Upload               :  90% 243M/270M [00:30<00:01, 19.8MB/s, 15.4MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  99% 2.65G/2.68G [00:31<00:01, 18.7MB/s, 60.2MB/s  ]\n",
            "New Data Upload               :  91% 246M/270M [00:31<00:01, 18.6MB/s, 15.4MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  99% 2.66G/2.68G [00:31<00:01, 17.8MB/s, 57.9MB/s  ]\n",
            "New Data Upload               :  93% 249M/270M [00:31<00:01, 17.8MB/s, 15.4MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  99% 2.66G/2.68G [00:31<00:00, 17.3MB/s, 56.7MB/s  ]\n",
            "New Data Upload               :  94% 253M/270M [00:31<00:00, 17.3MB/s, 15.3MB/s  ]\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors:  99% 2.66G/2.68G [00:28<00:00, 91.3MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      :  99% 2.66G/2.68G [00:31<00:01, 13.0MB/s, 53.8MB/s  ]\n",
            "New Data Upload               :  95% 256M/270M [00:31<00:01, 13.0MB/s, 14.8MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      : 100% 2.67G/2.68G [00:32<00:00, 14.2MB/s, 51.6MB/s  ]\n",
            "New Data Upload               :  96% 259M/270M [00:32<00:00, 14.2MB/s, 14.9MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      : 100% 2.67G/2.68G [00:32<00:00, 14.7MB/s, 49.2MB/s  ]\n",
            "New Data Upload               :  97% 263M/270M [00:32<00:00, 14.7MB/s, 14.8MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      : 100% 2.67G/2.68G [00:32<00:00, 15.0MB/s, 47.7MB/s  ]\n",
            "New Data Upload               :  99% 266M/270M [00:32<00:00, 15.0MB/s, 14.8MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      : 100% 2.68G/2.68G [00:32<00:00, 15.3MB/s, 46.1MB/s  ]\n",
            "New Data Upload               : 100% 269M/270M [00:32<00:00, 15.3MB/s, 14.7MB/s  ]\u001b[A\n",
            "\n",
            "Processing Files (0 / 1)      : 100% 2.68G/2.68G [00:32<00:00, 11.7MB/s, 43.5MB/s  ]\n",
            "New Data Upload               : 100% 269M/270M [00:32<00:00, 11.7MB/s, 14.3MB/s  ]\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:30<00:00, 87.6MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:30<00:00, 87.1MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:30<00:00, 86.5MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:30<00:00, 85.9MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:30<00:00, 85.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:31<00:00, 84.8MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (1 / 1)      : 100% 2.68G/2.68G [00:34<00:00, 3.10MB/s, 29.8MB/s  ]\n",
            "New Data Upload               : 100% 270M/270M [00:34<00:00, 3.10MB/s, 12.1MB/s  ]\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:31<00:00, 83.8MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:31<00:00, 83.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:31<00:00, 82.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:32<00:00, 82.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:32<00:00, 81.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:32<00:00, 81.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:32<00:00, 80.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:32<00:00, 80.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:33<00:00, 79.7MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:33<00:00, 79.2MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:33<00:00, 78.8MB/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:33<00:00, 78.4MB/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (1 / 1)      : 100% 2.68G/2.68G [00:36<00:00, 72.8MB/s, 10.1MB/s  ]\n",
            "New Data Upload               : 100% 270M/270M [00:36<00:00, 7.33MB/s, 10.1MB/s  ]\n",
            "  ...4fmqprp/model.safetensors: 100% 2.68G/2.68G [00:33<00:00, 78.3MB/s]\n",
            "\n",
            "README.md: 100% 690/690 [00:00<00:00, 5.23MB/s]\n",
            "Processing Files (0 / 0)      : |          |  0.00B /  0.00B            \n",
            "New Data Upload               : |          |  0.00B /  0.00B            \u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (1 / 1)      : 100% 11.4M/11.4M [00:00<00:00, 17.0MB/s, 28.6MB/s  ]\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:01<?, ?B/s]\u001b[A\u001b[A\n",
            "\n",
            "Processing Files (1 / 1)      : 100% 11.4M/11.4M [00:02<00:00, 4.62MB/s, 5.19MB/s  ]\n",
            "New Data Upload               : |          |  0.00B /  0.00B,  0.00B/s  \n",
            "  ...mp5ah361yo/tokenizer.json: 100% 11.4M/11.4M [00:01<?, ?B/s]\n",
            "[OK] Model pushed -> https://huggingface.co/OGrohit/logtriage-sre-agent\n",
            "\n",
            "============================================================\n",
            "[OK] TRAINING COMPLETE\n",
            "============================================================\n",
            "  Reward curve:  reward_curve.png\n",
            "  Trained model: ./logtriage-trained\n",
            "  HF Hub:        https://huggingface.co/OGrohit/logtriage-sre-agent\n",
            "\n",
            "  Use reward_curve.png in your demo slide.\n",
            "  This image is 20% of your judging score.\n",
            "\n",
            "  0% 0/10 [06:19<?, ?it/s]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!python merge_curves.py"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RMalsurHCPU3",
        "outputId": "e4e405b1-d1ff-40ad-955f-a3c976db6bc6"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "=== merge_curves.py ===\n",
            "Checkpoint dir : ./phase2_checkpoints\n",
            "Output         : reward_curve.png\n",
            "\n",
            "[OK] single_crash: loaded 25 episodes from single_crash_ep25.json\n",
            "  single_crash:\n",
            "    First 10 avg : +0.180\n",
            "    Last  10 avg : +0.145\n",
            "    Improvement  : -0.035\n",
            "[OK] cascading_failure: loaded 25 episodes from cascading_failure_ep25.json\n",
            "  cascading_failure:\n",
            "    First 10 avg : +0.090\n",
            "    Last  10 avg : +0.185\n",
            "    Improvement  : +0.095\n",
            "[OK] silent_degradation: loaded 25 episodes from silent_degradation_ep25.json\n",
            "  silent_degradation:\n",
            "    First 10 avg : +0.180\n",
            "    Last  10 avg : +0.210\n",
            "    Improvement  : +0.030\n",
            "\n",
            "[OK] Saved: reward_curve.png\n",
            "     Open with: start reward_curve.png\n",
            "     Push with: git add reward_curve.png && git commit -m 'feat: 3-task reward curve' && git push\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "  from google.colab import files\n",
        "  files.download(\"reward_curve.png\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 17
        },
        "id": "jMipwtccCUBG",
        "outputId": "425ced85-97bf-48c8-ec8a-c85fb720258d"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ],
            "application/javascript": [
              "\n",
              "    async function download(id, filename, size) {\n",
              "      if (!google.colab.kernel.accessAllowed) {\n",
              "        return;\n",
              "      }\n",
              "      const div = document.createElement('div');\n",
              "      const label = document.createElement('label');\n",
              "      label.textContent = `Downloading \"${filename}\": `;\n",
              "      div.appendChild(label);\n",
              "      const progress = document.createElement('progress');\n",
              "      progress.max = size;\n",
              "      div.appendChild(progress);\n",
              "      document.body.appendChild(div);\n",
              "\n",
              "      const buffers = [];\n",
              "      let downloaded = 0;\n",
              "\n",
              "      const channel = await google.colab.kernel.comms.open(id);\n",
              "      // Send a message to notify the kernel that we're ready.\n",
              "      channel.send({})\n",
              "\n",
              "      for await (const message of channel.messages) {\n",
              "        // Send a message to notify the kernel that we're ready.\n",
              "        channel.send({})\n",
              "        if (message.buffers) {\n",
              "          for (const buffer of message.buffers) {\n",
              "            buffers.push(buffer);\n",
              "            downloaded += buffer.byteLength;\n",
              "            progress.value = downloaded;\n",
              "          }\n",
              "        }\n",
              "      }\n",
              "      const blob = new Blob(buffers, {type: 'application/binary'});\n",
              "      const a = document.createElement('a');\n",
              "      a.href = window.URL.createObjectURL(blob);\n",
              "      a.download = filename;\n",
              "      div.appendChild(a);\n",
              "      a.click();\n",
              "      div.remove();\n",
              "    }\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ],
            "application/javascript": [
              "download(\"download_1efc715d-f3eb-4702-86d0-e541c09e6c15\", \"reward_curve.png\", 268703)"
            ]
          },
          "metadata": {}
        }
      ]
    }
  ]
}