Spaces:

Vikaspandey582003
/

echo-ultimate

Sleeping

App Files Files Community

Vikaspandey582003 commited on 29 days ago

Commit

ce66956

verified ·

1 Parent(s): 1bacd77

feat: A10G-optimised GRPO config — 256 tokens, bf16, 300 samples

Browse files

Files changed (1) hide show

ECHO_Training.ipynb +31 -14

ECHO_Training.ipynb CHANGED Viewed

@@ -273,28 +273,36 @@
         "print(\"   BAD : all rewards exactly -0.5 → stop & report\")\n",
         "print(\"=\" * 50)"
       ],
-      "id": "081d73fd",
       "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
-        "# Configure GRPO training\n",
         "training_args = GRPOConfig(\n",
         "    output_dir=\"echo_grpo_output\",\n",
-        "    num_train_epochs=3,\n",
         "    per_device_train_batch_size=1,\n",
-        "    gradient_accumulation_steps=8,\n",
         "    learning_rate=2e-5,\n",
-        "    warmup_steps=50,\n",
-        "    logging_steps=10,\n",
-        "    save_steps=100,\n",
-        "    fp16=True,\n",
         "    report_to=\"none\",\n",
-        "    max_completion_length=512,\n",
-        "    num_generations=4,  # GRPO group size\n",
         "    temperature=0.8,\n",
         ")\n",
         "\n",
@@ -302,13 +310,22 @@
         "    model=model,\n",
         "    args=training_args,\n",
         "    reward_funcs=[echo_reward_function],\n",
-        "    train_dataset=dataset,\n",
         "    tokenizer=tokenizer,\n",
         ")\n",
         "\n",
-        "print(\"Starting GRPO training against live ECHO environment...\")\n",
         "trainer.train()\n",
-        "print(\"Training complete!\")"
       ],
       "execution_count": null,
       "outputs": [],

         "print(\"   BAD : all rewards exactly -0.5 → stop & report\")\n",
         "print(\"=\" * 50)"
       ],
       "execution_count": null,
+      "outputs": [],
+      "id": "081d73fd"
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
+        "# Configure GRPO training — OPTIMIZED for A10G small (~2.5 hrs, ~$3-4 cost)\n",
+        "# Hardware: A10G small ($1.05/hr) — 3x faster than T4 for 7B models\n",
+        "# max_completion_length=256: enough for reasoning, 2x faster than 512\n",
+        "\n",
+        "# Rebuild dataset for A10G run\n",
+        "dataset_a10g = build_training_dataset(300)\n",
+        "print(f\"Dataset: {len(dataset_a10g)} samples\")\n",
+        "\n",
         "training_args = GRPOConfig(\n",
         "    output_dir=\"echo_grpo_output\",\n",
+        "    num_train_epochs=1,\n",
         "    per_device_train_batch_size=1,\n",
+        "    gradient_accumulation_steps=8,    # effective batch = 8, keep for GRPO stability\n",
         "    learning_rate=2e-5,\n",
+        "    warmup_steps=20,\n",
+        "    logging_steps=5,\n",
+        "    save_steps=50,\n",
+        "    bf16=True,                        # A10G supports bfloat16 — better than fp16\n",
+        "    fp16=False,\n",
         "    report_to=\"none\",\n",
+        "    max_completion_length=256,        # 256 = enough reasoning space, 2x faster than 512\n",
+        "    num_generations=4,                # GRPO group size — do NOT reduce\n",
         "    temperature=0.8,\n",
         ")\n",
         "\n",
         "    model=model,\n",
         "    args=training_args,\n",
         "    reward_funcs=[echo_reward_function],\n",
+        "    train_dataset=dataset_a10g,\n",
         "    tokenizer=tokenizer,\n",
         ")\n",
         "\n",
+        "print(\"=\" * 55)\n",
+        "print(\"🚀  ECHO GRPO Training — A10G small + 256 tokens\")\n",
+        "print(\"    300 samples | 1 epoch | grad_accum=8\")\n",
+        "print(\"    Estimated: ~2.5 hrs | Cost: ~$3-4\")\n",
+        "print(\"=\" * 55)\n",
+        "print()\n",
+        "print(\"Watch step output — after step 5 you should see:\")\n",
+        "print(\"  GOOD: rewards mixed between -0.5 and +0.8\")\n",
+        "print(\"  BAD : all rewards exactly -0.5 → stop & report\")\n",
+        "print()\n",
         "trainer.train()\n",
+        "print(\"\\n✅ Training complete!\")"
       ],
       "execution_count": null,
       "outputs": [],