{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# GRPO Training on API Debug Environment\n", "\n", "Trains Qwen 0.5B to debug malformed API requests using reward signals from the live HF Space.\n", "Includes 6-level curriculum learning: easy -> classify -> medium -> headers -> response -> hard.\n", "\n", "**Requirements**: Free Colab T4 GPU" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Cell 1: Install dependencies (vllm required for fast generation)\n!pip install -q \"trl[vllm]>=0.26.0\" transformers torch datasets openenv-core openai" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Cell 2: Clone repo\n", "!git clone https://github.com/Avi-chauhan/api-debug-env.git\n", "%cd api-debug-env" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Cell 3: Verify GPU\n", "import torch\n", "print(f\"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}\")\n", "print(f\"CUDA: {torch.cuda.is_available()}\")\n", "print(f\"BF16: {torch.cuda.is_bf16_supported() if torch.cuda.is_available() else False}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Cell 4: Quick environment connectivity test\n", "import asyncio\n", "import sys\n", "sys.path.insert(0, '.')\n", "from client import APIDebugEnv\n", "from models import APIDebugAction\n", "\n", "async def test_env():\n", " env = APIDebugEnv(base_url='https://avichauhan-api-debug-env.hf.space')\n", " result = await env.reset(task='easy')\n", " print(f'Connected! Task: {result.observation.task}')\n", " print(f'API: {result.observation.api_name}')\n", " print(f'Error count: {result.observation.error_count}')\n", " result = await env.step(APIDebugAction(error_type='missing_required_field', affected_fields=['email']))\n", " print(f'Reward: {result.reward}, Done: {result.done}')\n", " await env.close()\n", "\n", "await test_env()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Cell 5: Run GRPO training\n", "# This will take ~30-60 minutes on a T4 GPU\n", "!python training/train.py" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Cell 6: Upload trained model to HuggingFace\nfrom google.colab import userdata\nfrom huggingface_hub import HfApi\n\ntoken = userdata.get('HF_TOKEN')\napi = HfApi(token=token)\n\n# Create repo first (in case it doesn't exist)\napi.create_repo('avichauhan/api-debug-grpo-qwen-0.5b', exist_ok=True)\n\napi.upload_folder(\n folder_path='./outputs/api-debug-grpo',\n repo_id='avichauhan/api-debug-grpo-qwen-0.5b',\n repo_type='model',\n create_pr=False,\n)\nprint('Model uploaded to: https://huggingface.co/avichauhan/api-debug-grpo-qwen-0.5b')" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.12.0" }, "accelerator": "GPU", "colab": { "gpuType": "T4" } }, "nbformat": 4, "nbformat_minor": 4 }