{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GRPO Training on API Debug Environment\n",
    "\n",
    "Trains Qwen 0.5B to debug malformed API requests using reward signals from the live HF Space.\n",
    "Includes 6-level curriculum learning: easy -> classify -> medium -> headers -> response -> hard.\n",
    "\n",
    "**Requirements**: Free Colab T4 GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "# Cell 1: Install dependencies (vllm required for fast generation)\n!pip install -q \"trl[vllm]>=0.26.0\" transformers torch datasets openenv-core openai"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 2: Clone repo\n",
    "!git clone https://github.com/Avi-chauhan/api-debug-env.git\n",
    "%cd api-debug-env"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 3: Verify GPU\n",
    "import torch\n",
    "print(f\"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}\")\n",
    "print(f\"CUDA: {torch.cuda.is_available()}\")\n",
    "print(f\"BF16: {torch.cuda.is_bf16_supported() if torch.cuda.is_available() else False}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 4: Quick environment connectivity test\n",
    "import asyncio\n",
    "import sys\n",
    "sys.path.insert(0, '.')\n",
    "from client import APIDebugEnv\n",
    "from models import APIDebugAction\n",
    "\n",
    "async def test_env():\n",
    "    env = APIDebugEnv(base_url='https://avichauhan-api-debug-env.hf.space')\n",
    "    result = await env.reset(task='easy')\n",
    "    print(f'Connected! Task: {result.observation.task}')\n",
    "    print(f'API: {result.observation.api_name}')\n",
    "    print(f'Error count: {result.observation.error_count}')\n",
    "    result = await env.step(APIDebugAction(error_type='missing_required_field', affected_fields=['email']))\n",
    "    print(f'Reward: {result.reward}, Done: {result.done}')\n",
    "    await env.close()\n",
    "\n",
    "await test_env()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cell 5: Run GRPO training\n",
    "# This will take ~30-60 minutes on a T4 GPU\n",
    "!python training/train.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "# Cell 6: Upload trained model to HuggingFace\nfrom google.colab import userdata\nfrom huggingface_hub import HfApi\n\ntoken = userdata.get('HF_TOKEN')\napi = HfApi(token=token)\n\n# Create repo first (in case it doesn't exist)\napi.create_repo('avichauhan/api-debug-grpo-qwen-0.5b', exist_ok=True)\n\napi.upload_folder(\n    folder_path='./outputs/api-debug-grpo',\n    repo_id='avichauhan/api-debug-grpo-qwen-0.5b',\n    repo_type='model',\n    create_pr=False,\n)\nprint('Model uploaded to: https://huggingface.co/avichauhan/api-debug-grpo-qwen-0.5b')"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.12.0"
  },
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}