asdf98
/

IRIS-architecture

@@ -282,6 +282,7 @@
    "metadata": {},
    "source": [
     "# Create IRIS-Tiny (best for free-tier)\n",
     "config = IRISConfig(\n",
     "    latent_channels=4,        # SD-VAE standard\n",
     "    latent_spatial=32,        # 256px / 8\n",
@@ -290,17 +291,17 @@
     "    head_dim=64,\n",
     "    ffn_ratio=2.667,\n",
     "    num_prelude_blocks=1,\n",
-    "    num_core_layers=3,\n",
     "    num_coda_blocks=1,\n",
-    "    default_iterations=6,\n",
     "    max_iterations=16,\n",
     "    fourier_num_blocks=6,\n",
     "    sparsity_threshold=0.01,\n",
     "    recurrence_dim=192,\n",
-    "    manhattan_window=12,\n",
     "    text_dim=768,\n",
     "    max_text_tokens=77,\n",
-    "    patch_size=2,\n",
     ")\n",
     "\n",
     "iris = IRIS(config).to(device)\n",
@@ -309,7 +310,7 @@
     "\n",
     "print(f\"IRIS Generator: {gen_params:,} params ({gen_params*2/1024/1024:.1f} MB fp16)\")\n",
     "print(f\"  Core (shared):  {core_params:,} ({core_params/gen_params*100:.1f}%)\")\n",
-    "print(f\"  Effective @r=6: ~{gen_params + 5*core_params:,} effective params\")\n",
     "print(f\"  Input: [B, 4, 32, 32] latent \u2192 Output: [B, 4, 32, 32] velocity\")"
    ],
    "outputs": [],
@@ -359,7 +360,7 @@
     "\n",
     "print(f\"Training for {EPOCHS} epochs ({total_steps} optimizer steps)\")\n",
     "print(f\"Batch: {BATCH_SIZE} \u00d7 {GRAD_ACCUM} accum = {BATCH_SIZE*GRAD_ACCUM} effective\")\n",
-    "print(f\"Iterations per step: random from [3, 4, 5]\")\n",
     "print()\n",
     "\n",
     "# \u2500\u2500\u2500 Training Loop \u2500\u2500\u2500\n",
@@ -379,7 +380,7 @@
     "        text_emb = text_emb.to(device, non_blocking=True)\n",
     "\n",
     "        with torch.amp.autocast('cuda', dtype=torch.float16):\n",
-    "            r = [3, 4, 5][torch.randint(0, 3, (1,)).item()]\n",
     "            result = iris.train_step_latent(z_0, text_emb, num_iterations=r)\n",
     "            loss = result[\"loss\"] / GRAD_ACCUM\n",
     "\n",
@@ -431,7 +432,7 @@
     "\n",
     "iris.eval()\n",
     "fig, axes = plt.subplots(len(prompts), 4, figsize=(16, len(prompts)*4))\n",
-    "iter_counts = [2, 4, 6, 8]\n",
     "\n",
     "for row, prompt in enumerate(prompts):\n",
     "    text_emb = encode_text([prompt])\n",

    "metadata": {},
    "source": [
     "# Create IRIS-Tiny (best for free-tier)\n",
+    "# patch_size=4 reduces tokens from 256 to 64 \u2192 4\u00d7 faster training\n",
     "config = IRISConfig(\n",
     "    latent_channels=4,        # SD-VAE standard\n",
     "    latent_spatial=32,        # 256px / 8\n",
     "    head_dim=64,\n",
     "    ffn_ratio=2.667,\n",
     "    num_prelude_blocks=1,\n",
+    "    num_core_layers=2,        # 2 layers (speed vs quality tradeoff for demo)\n",
     "    num_coda_blocks=1,\n",
+    "    default_iterations=4,\n",
     "    max_iterations=16,\n",
     "    fourier_num_blocks=6,\n",
     "    sparsity_threshold=0.01,\n",
     "    recurrence_dim=192,\n",
+    "    manhattan_window=8,\n",
     "    text_dim=768,\n",
     "    max_text_tokens=77,\n",
+    "    patch_size=4,             # 4\u00d7 larger patches \u2192 64 tokens instead of 256\n",
     ")\n",
     "\n",
     "iris = IRIS(config).to(device)\n",
     "\n",
     "print(f\"IRIS Generator: {gen_params:,} params ({gen_params*2/1024/1024:.1f} MB fp16)\")\n",
     "print(f\"  Core (shared):  {core_params:,} ({core_params/gen_params*100:.1f}%)\")\n",
+    "print(f\"  Tokens: {config.num_patches} (from {config.latent_spatial}\u00d7{config.latent_spatial} latent, patch_size={config.patch_size})\")\n",
     "print(f\"  Input: [B, 4, 32, 32] latent \u2192 Output: [B, 4, 32, 32] velocity\")"
    ],
    "outputs": [],
     "\n",
     "print(f\"Training for {EPOCHS} epochs ({total_steps} optimizer steps)\")\n",
     "print(f\"Batch: {BATCH_SIZE} \u00d7 {GRAD_ACCUM} accum = {BATCH_SIZE*GRAD_ACCUM} effective\")\n",
+    "print(f\"Iterations per step: random from [2, 3, 4]\")\n",
     "print()\n",
     "\n",
     "# \u2500\u2500\u2500 Training Loop \u2500\u2500\u2500\n",
     "        text_emb = text_emb.to(device, non_blocking=True)\n",
     "\n",
     "        with torch.amp.autocast('cuda', dtype=torch.float16):\n",
+    "            r = [2, 3, 4][torch.randint(0, 3, (1,)).item()]\n",
     "            result = iris.train_step_latent(z_0, text_emb, num_iterations=r)\n",
     "            loss = result[\"loss\"] / GRAD_ACCUM\n",
     "\n",
     "\n",
     "iris.eval()\n",
     "fig, axes = plt.subplots(len(prompts), 4, figsize=(16, len(prompts)*4))\n",
+    "iter_counts = [2, 3, 4, 6]\n",
     "\n",
     "for row, prompt in enumerate(prompts):\n",
     "    text_emb = encode_text([prompt])\n",