Spaces:

YUS200619
/

swebench-ind

Sleeping

App Files Files Community

YUS200619 commited on 15 days ago

Commit

b28acab

verified ·

1 Parent(s): 9497e48

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

notebooks/training.ipynb +326 -293
server/app.py +6 -6
server/swebench_in_environment.py +1 -4

notebooks/training.ipynb CHANGED Viewed

@@ -1,293 +1,326 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# SWEbench-IN — GRPO Training Notebook\n",
-    "\n",
-    "This notebook trains a Qwen2.5-3B-Instruct model using GRPO (Group Relative Policy Optimization)\n",
-    "to act as an Indian SWE — fixing broken Linux systems while managing stakeholder communication.\n",
-    "\n",
-    "**Prerequisites:**\n",
-    "- A running SWEbench-IN HuggingFace Space\n",
-    "- A Weights & Biases account\n",
-    "- Google Colab with GPU runtime (T4 or better)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Cell 1 — Install Dependencies"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install unsloth trl transformers accelerate openenv-client wandb -q"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Cell 2 — Import and Configure"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import wandb\n",
-    "import random\n",
-    "import re\n",
-    "import json\n",
-    "from unsloth import FastLanguageModel\n",
-    "from trl import GRPOTrainer, GRPOConfig\n",
-    "from openenv.client import Environment as OpenEnvClient\n",
-    "\n",
-    "wandb.init(project=\"swebench-in\", name=\"grpo-run-1\")\n",
-    "\n",
-    "HF_SPACE_URL = \"YOUR_HF_SPACE_URL_HERE\"  # Replace before running\n",
-    "env = OpenEnvClient(HF_SPACE_URL)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Cell 3 — Load Model (Qwen2.5-3B-Instruct, 4-bit QLoRA via Unsloth)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
-    "    model_name=\"Qwen/Qwen2.5-3B-Instruct\",\n",
-    "    max_seq_length=2048,\n",
-    "    dtype=None,\n",
-    "    load_in_4bit=True,\n",
-    ")\n",
-    "model = FastLanguageModel.get_peft_model(\n",
-    "    model,\n",
-    "    r=16,\n",
-    "    target_modules=[\"q_proj\", \"v_proj\"],\n",
-    "    lora_alpha=16,\n",
-    "    lora_dropout=0,\n",
-    "    bias=\"none\",\n",
-    "    use_gradient_checkpointing=True,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Cell 4 — Define Rollout Function"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def parse_action(action_text: str) -> dict:\n",
-    "    \"\"\"\n",
-    "    Parse the model's generated text into an action dict.\n",
-    "    Expected format: ACTION_TYPE: args\n",
-    "    \"\"\"\n",
-    "    action_text = action_text.strip()\n",
-    "    # Try to find action pattern\n",
-    "    match = re.search(r'(run_command|read_file|write_file|run_tests|check_server|reply_slack|reply_email|reply_hr|close_case)[:\\s]+(.*)', action_text, re.DOTALL)\n",
-    "    if match:\n",
-    "        return {\"type\": match.group(1), \"args\": match.group(2).strip()}\n",
-    "    # Default: treat as run_command\n",
-    "    return {\"type\": \"run_command\", \"args\": action_text}\n",
-    "\n",
-    "\n",
-    "def rollout(prompt: str, task_id: int) -> tuple[list[str], float]:\n",
-    "    \"\"\"\n",
-    "    Run one episode. Return (action_sequence, total_reward).\n",
-    "    Uses sampling with temperature 0.7.\n",
-    "    \"\"\"\n",
-    "    obs = env.reset(task_id=task_id)\n",
-    "    actions = []\n",
-    "    total_reward = 0.0\n",
-    "    done = False\n",
-    "\n",
-    "    while not done:\n",
-    "        inputs = tokenizer(f\"Observation: {obs}\\nAction:\", return_tensors=\"pt\")\n",
-    "        output = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)\n",
-    "        action_text = tokenizer.decode(output[0], skip_special_tokens=True)\n",
-    "        action = parse_action(action_text)\n",
-    "        obs, reward, done, info = env.step(action)\n",
-    "        actions.append(action_text)\n",
-    "        total_reward += reward\n",
-    "\n",
-    "    return actions, total_reward"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Cell 5 — Curriculum Training Loop\n",
-    "\n",
-    "Curriculum escalates when average reward over last 50 episodes crosses 0.6:\n",
-    "- **Tier 1** (Steps 0–200): Tasks 1+2 only (easy, technical reward)\n",
-    "- **Tier 2** (Steps 200–500): Add Tasks 3+4 (communication reward added)\n",
-    "- **Tier 3** (Steps 500+): Add Task 5 (leave protection added)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Curriculum: tier 1 tasks first (1,2), then tier 2 (3,4), then tier 3 (5)\n",
-    "CURRICULUM = {\n",
-    "    \"tier1\": [1, 2],\n",
-    "    \"tier2\": [3, 4],\n",
-    "    \"tier3\": [5],\n",
-    "}\n",
-    "\n",
-    "current_tier = \"tier1\"\n",
-    "tier_rewards = []\n",
-    "\n",
-    "for step in range(700):\n",
-    "    task_id = random.choice(CURRICULUM[current_tier])\n",
-    "    actions, reward = rollout(\"\", task_id)\n",
-    "\n",
-    "    # Log to wandb\n",
-    "    wandb.log({\n",
-    "        \"reward/total\": reward,\n",
-    "        \"training_step\": step,\n",
-    "        \"task_id\": task_id,\n",
-    "        \"current_tier\": current_tier,\n",
-    "        \"num_actions\": len(actions),\n",
-    "    })\n",
-    "\n",
-    "    tier_rewards.append(reward)\n",
-    "\n",
-    "    # Escalate curriculum\n",
-    "    if len(tier_rewards) >= 50 and sum(tier_rewards[-50:]) / 50 >= 0.6:\n",
-    "        if current_tier == \"tier1\":\n",
-    "            current_tier = \"tier2\"\n",
-    "            tier_rewards = []\n",
-    "            print(f\"Step {step}: Escalating to tier 2\")\n",
-    "        elif current_tier == \"tier2\":\n",
-    "            current_tier = \"tier3\"\n",
-    "            tier_rewards = []\n",
-    "            print(f\"Step {step}: Escalating to tier 3\")\n",
-    "\n",
-    "    if step % 50 == 0:\n",
-    "        avg = sum(tier_rewards[-50:]) / max(len(tier_rewards[-50:]), 1)\n",
-    "        print(f\"Step {step} | Tier: {current_tier} | Avg reward (last 50): {avg:.3f}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Cell 6 — Save Model Correctly\n",
-    "\n",
-    "**CRITICAL:** Do NOT merge LoRA into 4-bit base model — this damages quality.\n",
-    "Use `save_pretrained` with `method=\"lora\"`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# CRITICAL: Do NOT merge LoRA into 4-bit base. Use save_pretrained with method=\"lora\"\n",
-    "model.save_pretrained(\"swebench-in-lora\")\n",
-    "tokenizer.save_pretrained(\"swebench-in-lora\")\n",
-    "# Push to hub\n",
-    "model.push_to_hub(\"YOUR_HF_USERNAME/swebench-in-lora\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Cell 7 — Generate and Commit Training Plots\n",
-    "\n",
-    "Both plots must be committed as `.png` files to the repo.\n",
-    "Wandb-only links do not count for the automated validation check."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import os\n",
-    "\n",
-    "# Pull run history from wandb\n",
-    "run = wandb.run\n",
-    "history = run.history()\n",
-    "\n",
-    "os.makedirs(\"plots\", exist_ok=True)\n",
-    "\n",
-    "# --- Reward Curve ---\n",
-    "fig, ax = plt.subplots(figsize=(10, 5))\n",
-    "ax.plot(history[\"training_step\"], history[\"reward/total\"],\n",
-    "        label=\"Trained Agent\", color=\"steelblue\")\n",
-    "ax.axhline(y=-0.4, color=\"orange\", linestyle=\"--\",\n",
-    "           label=\"Untrained Baseline (-0.4)\")\n",
-    "ax.set_xlabel(\"Training Step\")\n",
-    "ax.set_ylabel(\"Episode Reward\")\n",
-    "ax.set_title(\"SWEbench-IN: Training Reward Curve\")\n",
-    "ax.legend()\n",
-    "ax.grid(True, alpha=0.3)\n",
-    "plt.tight_layout()\n",
-    "plt.savefig(\"plots/reward_curve.png\", dpi=150)\n",
-    "plt.show()\n",
-    "print(\"plots/reward_curve.png saved. Commit it to your repo now.\")\n",
-    "\n",
-    "# --- Loss Curve ---\n",
-    "fig, ax = plt.subplots(figsize=(10, 5))\n",
-    "if \"loss\" in history.columns:\n",
-    "    ax.plot(history[\"training_step\"], history[\"loss\"],\n",
-    "            label=\"Policy Loss\", color=\"crimson\")\n",
-    "ax.set_xlabel(\"Training Step\")\n",
-    "ax.set_ylabel(\"Loss\")\n",
-    "ax.set_title(\"SWEbench-IN: Policy Loss Curve\")\n",
-    "ax.legend()\n",
-    "ax.grid(True, alpha=0.3)\n",
-    "plt.tight_layout()\n",
-    "plt.savefig(\"plots/loss_curve.png\", dpi=150)\n",
-    "plt.show()\n",
-    "print(\"plots/loss_curve.png saved. Commit it to your repo now.\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.11.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# SWEbench-IN \u2014 GRPO Training Notebook\n",
+    "\n",
+    "This notebook trains a Qwen2.5-3B-Instruct model using GRPO (Group Relative Policy Optimization)\n",
+    "to act as an Indian SWE \u2014 fixing broken Linux systems while managing stakeholder communication.\n",
+    "\n",
+    "**Prerequisites:**\n",
+    "- A running SWEbench-IN HuggingFace Space\n",
+    "- A Weights & Biases account\n",
+    "- Google Colab with GPU runtime (T4 or better)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cell 1 \u2014 Install Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31mERROR: Ignored the following versions that require a different python version: 2025.3.4 Requires-Python <=3.12,>=3.9\u001b[0m\u001b[31m\n",
+      "\u001b[0m\u001b[31mERROR: Could not find a version that satisfies the requirement openenv-client (from versions: none)\u001b[0m\u001b[31m\n",
+      "\u001b[0m\u001b[31mERROR: No matching distribution found for openenv-client\u001b[0m\u001b[31m\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install unsloth trl transformers accelerate openenv-core[core]>=0.2.2 wandb -q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cell 2 \u2014 Import and Configure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'unsloth'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_1776/4025435489.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0munsloth\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mFastLanguageModel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtrl\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mGRPOTrainer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGRPOConfig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mopenenv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mEnvironment\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mOpenEnvClient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'unsloth'",
+      "",
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "import wandb\n",
+    "import random\n",
+    "import re\n",
+    "import json\n",
+    "from unsloth import FastLanguageModel\n",
+    "from trl import GRPOTrainer, GRPOConfig\n",
+    "from openenv.client import Environment as OpenEnvClient\n",
+    "\n",
+    "wandb.init(project=\"swebench-in\", name=\"grpo-run-1\")\n",
+    "\n",
+    "HF_SPACE_URL = \"YOUR_HF_SPACE_URL_HERE\"  # Replace before running\n",
+    "env = OpenEnvClient(HF_SPACE_URL)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cell 3 \u2014 Load Model (Qwen2.5-3B-Instruct, 4-bit QLoRA via Unsloth)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name=\"Qwen/Qwen2.5-3B-Instruct\",\n",
+    "    max_seq_length=2048,\n",
+    "    dtype=None,\n",
+    "    load_in_4bit=True,\n",
+    ")\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    r=16,\n",
+    "    target_modules=[\"q_proj\", \"v_proj\"],\n",
+    "    lora_alpha=16,\n",
+    "    lora_dropout=0,\n",
+    "    bias=\"none\",\n",
+    "    use_gradient_checkpointing=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cell 4 \u2014 Define Rollout Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def parse_action(action_text: str) -> dict:\n",
+    "    \"\"\"\n",
+    "    Parse the model's generated text into an action dict.\n",
+    "    Expected format: ACTION_TYPE: args\n",
+    "    \"\"\"\n",
+    "    action_text = action_text.strip()\n",
+    "    # Try to find action pattern\n",
+    "    match = re.search(r'(run_command|read_file|write_file|run_tests|check_server|reply_slack|reply_email|reply_hr|close_case)[:\\s]+(.*)', action_text, re.DOTALL)\n",
+    "    if match:\n",
+    "        return {\"type\": match.group(1), \"args\": match.group(2).strip()}\n",
+    "    # Default: treat as run_command\n",
+    "    return {\"type\": \"run_command\", \"args\": action_text}\n",
+    "\n",
+    "\n",
+    "def rollout(prompt: str, task_id: int) -> tuple[list[str], float]:\n",
+    "    \"\"\"\n",
+    "    Run one episode. Return (action_sequence, total_reward).\n",
+    "    Uses sampling with temperature 0.7.\n",
+    "    \"\"\"\n",
+    "    obs = env.reset(task_id=task_id)\n",
+    "    actions = []\n",
+    "    total_reward = 0.0\n",
+    "    done = False\n",
+    "\n",
+    "    while not done:\n",
+    "        inputs = tokenizer(f\"Observation: {obs}\\nAction:\", return_tensors=\"pt\")\n",
+    "        output = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)\n",
+    "        action_text = tokenizer.decode(output[0], skip_special_tokens=True)\n",
+    "        action = parse_action(action_text)\n",
+    "        obs, reward, done, info = env.step(action)\n",
+    "        actions.append(action_text)\n",
+    "        total_reward += reward\n",
+    "\n",
+    "    return actions, total_reward"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cell 5 \u2014 Curriculum Training Loop\n",
+    "\n",
+    "Curriculum escalates when average reward over last 50 episodes crosses 0.6:\n",
+    "- **Tier 1** (Steps 0\u2013200): Tasks 1+2 only (easy, technical reward)\n",
+    "- **Tier 2** (Steps 200\u2013500): Add Tasks 3+4 (communication reward added)\n",
+    "- **Tier 3** (Steps 500+): Add Task 5 (leave protection added)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Curriculum: tier 1 tasks first (1,2), then tier 2 (3,4), then tier 3 (5)\n",
+    "CURRICULUM = {\n",
+    "    \"tier1\": [1, 2],\n",
+    "    \"tier2\": [3, 4],\n",
+    "    \"tier3\": [5],\n",
+    "}\n",
+    "\n",
+    "current_tier = \"tier1\"\n",
+    "tier_rewards = []\n",
+    "\n",
+    "for step in range(700):\n",
+    "    task_id = random.choice(CURRICULUM[current_tier])\n",
+    "    actions, reward = rollout(\"\", task_id)\n",
+    "\n",
+    "    # Log to wandb\n",
+    "    wandb.log({\n",
+    "        \"reward/total\": reward,\n",
+    "        \"training_step\": step,\n",
+    "        \"task_id\": task_id,\n",
+    "        \"current_tier\": current_tier,\n",
+    "        \"num_actions\": len(actions),\n",
+    "    })\n",
+    "\n",
+    "    tier_rewards.append(reward)\n",
+    "\n",
+    "    # Escalate curriculum\n",
+    "    if len(tier_rewards) >= 50 and sum(tier_rewards[-50:]) / 50 >= 0.6:\n",
+    "        if current_tier == \"tier1\":\n",
+    "            current_tier = \"tier2\"\n",
+    "            tier_rewards = []\n",
+    "            print(f\"Step {step}: Escalating to tier 2\")\n",
+    "        elif current_tier == \"tier2\":\n",
+    "            current_tier = \"tier3\"\n",
+    "            tier_rewards = []\n",
+    "            print(f\"Step {step}: Escalating to tier 3\")\n",
+    "\n",
+    "    if step % 50 == 0:\n",
+    "        avg = sum(tier_rewards[-50:]) / max(len(tier_rewards[-50:]), 1)\n",
+    "        print(f\"Step {step} | Tier: {current_tier} | Avg reward (last 50): {avg:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cell 6 \u2014 Save Model Correctly\n",
+    "\n",
+    "**CRITICAL:** Do NOT merge LoRA into 4-bit base model \u2014 this damages quality.\n",
+    "Use `save_pretrained` with `method=\"lora\"`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# CRITICAL: Do NOT merge LoRA into 4-bit base. Use save_pretrained with method=\"lora\"\n",
+    "model.save_pretrained(\"swebench-in-lora\")\n",
+    "tokenizer.save_pretrained(\"swebench-in-lora\")\n",
+    "# Push to hub\n",
+    "model.push_to_hub(\"YOUR_HF_USERNAME/swebench-in-lora\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cell 7 \u2014 Generate and Commit Training Plots\n",
+    "\n",
+    "Both plots must be committed as `.png` files to the repo.\n",
+    "Wandb-only links do not count for the automated validation check."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import os\n",
+    "\n",
+    "# Pull run history from wandb\n",
+    "run = wandb.run\n",
+    "history = run.history()\n",
+    "\n",
+    "os.makedirs(\"plots\", exist_ok=True)\n",
+    "\n",
+    "# --- Reward Curve ---\n",
+    "fig, ax = plt.subplots(figsize=(10, 5))\n",
+    "ax.plot(history[\"training_step\"], history[\"reward/total\"],\n",
+    "        label=\"Trained Agent\", color=\"steelblue\")\n",
+    "ax.axhline(y=-0.4, color=\"orange\", linestyle=\"--\",\n",
+    "           label=\"Untrained Baseline (-0.4)\")\n",
+    "ax.set_xlabel(\"Training Step\")\n",
+    "ax.set_ylabel(\"Episode Reward\")\n",
+    "ax.set_title(\"SWEbench-IN: Training Reward Curve\")\n",
+    "ax.legend()\n",
+    "ax.grid(True, alpha=0.3)\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(\"plots/reward_curve.png\", dpi=150)\n",
+    "plt.show()\n",
+    "print(\"plots/reward_curve.png saved. Commit it to your repo now.\")\n",
+    "\n",
+    "# --- Loss Curve ---\n",
+    "fig, ax = plt.subplots(figsize=(10, 5))\n",
+    "if \"loss\" in history.columns:\n",
+    "    ax.plot(history[\"training_step\"], history[\"loss\"],\n",
+    "            label=\"Policy Loss\", color=\"crimson\")\n",
+    "ax.set_xlabel(\"Training Step\")\n",
+    "ax.set_ylabel(\"Loss\")\n",
+    "ax.set_title(\"SWEbench-IN: Policy Loss Curve\")\n",
+    "ax.legend()\n",
+    "ax.grid(True, alpha=0.3)\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(\"plots/loss_curve.png\", dpi=150)\n",
+    "plt.show()\n",
+    "print(\"plots/loss_curve.png saved. Commit it to your repo now.\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

server/app.py CHANGED Viewed

@@ -26,12 +26,12 @@ except Exception as e:  # pragma: no cover
         "openenv is required for the web interface. Install dependencies with '\n    uv sync\n'"
     ) from e
-try:
-    from ..models import SWEbenchINAction, SWEbenchINObservation
-    from .swebench_in_environment import SWEbenchINEnvironment
-except ModuleNotFoundError:
-    from models import SWEbenchINAction, SWEbenchINObservation
-    from server.swebench_in_environment import SWEbenchINEnvironment
 # Create the app with web interface and README integration

         "openenv is required for the web interface. Install dependencies with '\n    uv sync\n'"
     ) from e
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from models import SWEbenchINAction, SWEbenchINObservation
+from server.swebench_in_environment import SWEbenchINEnvironment
 # Create the app with web interface and README integration

server/swebench_in_environment.py CHANGED Viewed

@@ -10,10 +10,7 @@ from uuid import uuid4
 from openenv.core.env_server.interfaces import Environment
 from openenv.core.env_server.types import State
-try:
-    from ..models import SWEbenchINAction, SWEbenchINObservation
-except ImportError:
-    from models import SWEbenchINAction, SWEbenchINObservation
 import sys
 import os

 from openenv.core.env_server.interfaces import Environment
 from openenv.core.env_server.types import State
+from models import SWEbenchINAction, SWEbenchINObservation
 import sys
 import os