{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "8a5758a2", "metadata": {}, "outputs": [], "source": [ "# ============================================================\n", "# CELL 1 — Install + Clone\n", "# ============================================================\n", "import os, sys, subprocess, time\n", "from pathlib import Path\n", "\n", "# ---------- CONFIG ----------\n", "REPO_URL = \"https://github.com/Imsachin010/salespath_env.git\"\n", "MODEL_NAME = \"unsloth/Qwen2.5-7B-Instruct\" # 7B Model for final submission\n", "ENV_URL = \"http://127.0.0.1:8000\"\n", "OUTPUT_DIR = \"/content/salespath_out\"\n", "# -----------------------------------------\n", "\n", "def run(cmd, check=True, cwd=None):\n", " print(f\"\\n$ {cmd}\")\n", " r = subprocess.run(cmd, shell=True, text=True, capture_output=True, cwd=cwd)\n", " if r.stdout: print(r.stdout.strip())\n", " if r.stderr: print(r.stderr.strip())\n", " if check and r.returncode != 0:\n", " raise RuntimeError(f\"Command failed ({r.returncode}): {cmd}\")\n", " return r\n", "\n", "!nvidia-smi\n", "print(\"Python:\", sys.version)\n", "\n", "# Install dependencies\n", "!pip install -q -U pip\n", "!pip uninstall -y openenv 2>/dev/null || true\n", "!pip install -q fastapi uvicorn pydantic httpx openenv-core torch transformers trl unsloth datasets pyarrow huggingface_hub matplotlib\n", "\n", "# Clone repo\n", "if not Path(\"/content/salespath_env\").exists():\n", " run(f\"git clone {REPO_URL} /content/salespath_env\")\n", "else:\n", " print(\"Repo already cloned.\")\n", "\n", "REPO_ROOT = \"/content/salespath_env\"\n", "os.chdir(REPO_ROOT)\n", "print(\"Working dir:\", os.getcwd())\n", "\n", "# Install package in editable mode\n", "run(\"pip install -q -e .\")\n", "run(\"python -c \\\"import salespath_env; print('salespath_env import OK')\\\"\")\n", "run(\"python -c \\\"import openenv.core; print('openenv.core import OK')\\\"\")\n", "\n", "# HF Login\n", "hf_token = os.environ.get(\"HF_TOKEN\")\n", "if hf_token:\n", " from huggingface_hub import login\n", " login(token=hf_token)\n", " print(\"HF login OK\")\n", "else:\n", " print(\"HF_TOKEN not set.\")\n", "\n", "print(\"\\nāœ… Setup complete.\")" ] }, { "cell_type": "code", "execution_count": null, "id": "48a62b2d", "metadata": {}, "outputs": [], "source": [ "# Pull the fix we just pushed\n", "!git pull origin main" ] }, { "cell_type": "code", "execution_count": null, "id": "82e04114", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "97e47c56", "metadata": {}, "outputs": [], "source": [ "\n", "!PYTORCH_ALLOC_CONF=expandable_segments:True \\\n", "python -m training.grpo_train \\\n", " --mode grpo \\\n", " --model-name unsloth/Qwen2.5-7B-Instruct \\\n", " --grpo-steps 150 \\\n", " --grpo-dataset-size 128 \\\n", " --num-generations 2 \\\n", " --max-completion-length 128 \\\n", " --per-device-train-batch-size 2 \\\n", " --gradient-accumulation-steps 8 \\\n", " --output-dir /content/salespath_out \\\n", " --logging-steps 10\n" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 5 }