Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

__pycache__/predict.cpython-311.pyc +0 -0
model_pole_position.pt +1 -1
model_pong.pt +2 -2
model_sonic.pt +1 -1
predict.py +61 -82
train.log +152 -118

__pycache__/predict.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/predict.cpython-311.pyc and b/__pycache__/predict.cpython-311.pyc differ

model_pole_position.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27d26875071b536cc75cac27a0840b50cd6c9a8e1956c94f1cd08feacc49621f
 size 2971526

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e0affcef8e533a29037751e27948a3eb0f2fda2792ce2b3dfc876cadb09e281
 size 2971526

model_pong.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6c8b9235347bea94e7e5f5f0f225d4c1dbd13a749d5e28920c75c91902ecb11
-size 2435368

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab8070ddcde00333d7b52c89a0da9a61eece1e67c46163cd011ce4cd3c422f0c
+size 2436712

model_sonic.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9035098568ea4789c5dda58d685af07b4b5a0cdf300848f79ed6d96ad901da34
 size 6182614

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7e17327a6f03cb72a35bd3c48d481b4eebea5db6572ed2b3fa290b330bca304
 size 6182614

predict.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Hybrid v4: AR for Pong, direct 8-frame for Sonic/PP with TTA."""
 import sys
 import os
 import numpy as np
@@ -11,6 +11,12 @@ CONTEXT_FRAMES = 8
 PRED_FRAMES = 8
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 def detect_game(context_frames: np.ndarray) -> str:
     first_8 = context_frames[:CONTEXT_FRAMES]
@@ -26,65 +32,56 @@ def detect_game(context_frames: np.ndarray) -> str:
         return "sonic"
-class HybridModels:
-    def __init__(self):
-        self.models = {}
-        self.direct_cache = None  # Cache for direct 8-frame predictions
         self.cache_step = 0
     def reset_cache(self):
-        self.direct_cache = None
         self.cache_step = 0
 def load_model(model_dir: str):
-    hybrid = HybridModels()
-    # Pong: AR model (3 outputs)
-    pong = UNet(in_channels=24, out_channels=3,
-                enc_channels=(32, 64, 128), bottleneck_channels=128,
-                upsample_mode="bilinear").to(DEVICE)
-    sd = torch.load(os.path.join(model_dir, "model_pong.pt"),
-                    map_location=DEVICE, weights_only=True)
-    pong.load_state_dict({k: v.float() for k, v in sd.items()})
-    pong.eval()
-    hybrid.models["pong"] = pong
-    # Sonic: direct 8-frame model (24 outputs)
-    sonic = UNet(in_channels=24, out_channels=24,
-                 enc_channels=(48, 96, 192), bottleneck_channels=256,
-                 upsample_mode="bilinear").to(DEVICE)
-    sd = torch.load(os.path.join(model_dir, "model_sonic.pt"),
-                    map_location=DEVICE, weights_only=True)
-    sonic.load_state_dict({k: v.float() for k, v in sd.items()})
-    sonic.eval()
-    hybrid.models["sonic"] = sonic
-    # PP: direct 8-frame model (24 outputs)
-    pp = UNet(in_channels=24, out_channels=24,
-              enc_channels=(32, 64, 128), bottleneck_channels=192,
-              upsample_mode="bilinear").to(DEVICE)
-    sd = torch.load(os.path.join(model_dir, "model_pole_position.pt"),
-                    map_location=DEVICE, weights_only=True)
-    pp.load_state_dict({k: v.float() for k, v in sd.items()})
-    pp.eval()
-    hybrid.models["pole_position"] = pp
-    return hybrid
 def _predict_8frames(model, context_tensor, last_tensor):
     output = model(context_tensor)  # (1, 24, 64, 64)
     residuals = output.reshape(1, PRED_FRAMES, 3, 64, 64)
     last_expanded = last_tensor.unsqueeze(1).expand_as(residuals)
-    return torch.clamp(last_expanded + residuals, 0, 1)
-def predict_next_frame(hybrid, context_frames: np.ndarray) -> np.ndarray:
-    game = detect_game(context_frames)
-    model = hybrid.models[game]
     n = len(context_frames)
     if n < CONTEXT_FRAMES:
         padding = np.stack([context_frames[0]] * (CONTEXT_FRAMES - n), axis=0)
         frames = np.concatenate([padding, context_frames], axis=0)
@@ -98,50 +95,32 @@ def predict_next_frame(hybrid, context_frames: np.ndarray) -> np.ndarray:
     last_frame = frames_norm[-1]
     last_frame_t = np.transpose(last_frame, (2, 0, 1))[np.newaxis]
-    if game == "pong":
-        # AR prediction for Pong (no TTA, no caching)
-        with torch.no_grad():
-            context_tensor = torch.from_numpy(context).to(DEVICE)
-            last_tensor = torch.from_numpy(last_frame_t).to(DEVICE)
-            residual = model(context_tensor)
-            predicted = torch.clamp(last_tensor + residual, 0, 1)
-        predicted_np = predicted[0].cpu().numpy()
-        predicted_np = np.transpose(predicted_np, (1, 2, 0))
-        predicted_np = (predicted_np * 255).clip(0, 255).astype(np.uint8)
-        return predicted_np
-    else:
-        # Direct 8-frame for Sonic and PP with caching
-        if hybrid.direct_cache is not None and n > CONTEXT_FRAMES and hybrid.cache_step < PRED_FRAMES:
-            result = hybrid.direct_cache[hybrid.cache_step]
-            hybrid.cache_step += 1
-            if hybrid.cache_step >= PRED_FRAMES:
-                hybrid.reset_cache()
-            return result
-        # New window: predict all 8 frames with TTA
-        hybrid.reset_cache()
-        with torch.no_grad():
-            context_tensor = torch.from_numpy(context).to(DEVICE)
-            last_tensor = torch.from_numpy(last_frame_t).to(DEVICE)
-            predicted_orig = _predict_8frames(model, context_tensor, last_tensor)
-            # TTA: horizontal flip
             context_flipped = torch.flip(context_tensor, dims=[3])
             last_flipped = torch.flip(last_tensor, dims=[3])
             predicted_flipped = _predict_8frames(model, context_flipped, last_flipped)
-            predicted_flipped = torch.flip(predicted_flipped, dims=[4])  # flip width in (1,8,3,H,W)
             predicted = (predicted_orig + predicted_flipped) / 2.0
-        predicted_np = predicted[0].cpu().numpy()  # (8, 3, 64, 64)
-        hybrid.direct_cache = []
-        for i in range(PRED_FRAMES):
-            frame = np.transpose(predicted_np[i], (1, 2, 0))
-            frame = (frame * 255).clip(0, 255).astype(np.uint8)
-            hybrid.direct_cache.append(frame)
-        result = hybrid.direct_cache[hybrid.cache_step]
-        hybrid.cache_step += 1
-        return result

+"""Direct 8-frame prediction for all games with TTA."""
 import sys
 import os
 import numpy as np
 PRED_FRAMES = 8
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+GAME_CONFIGS = {
+    "pong": {"enc_channels": (32, 64, 128), "bottleneck": 128},
+    "sonic": {"enc_channels": (48, 96, 192), "bottleneck": 256},
+    "pole_position": {"enc_channels": (32, 64, 128), "bottleneck": 192},
+}
 def detect_game(context_frames: np.ndarray) -> str:
     first_8 = context_frames[:CONTEXT_FRAMES]
         return "sonic"
+class ModelCache:
+    def __init__(self, models):
+        self.models = models
+        self.cached_predictions = None
         self.cache_step = 0
     def reset_cache(self):
+        self.cached_predictions = None
         self.cache_step = 0
 def load_model(model_dir: str):
+    models = {}
+    for game, cfg in GAME_CONFIGS.items():
+        model = UNet(in_channels=24, out_channels=24,
+                     enc_channels=cfg["enc_channels"],
+                     bottleneck_channels=cfg["bottleneck"],
+                     upsample_mode="bilinear").to(DEVICE)
+        state_dict = torch.load(os.path.join(model_dir, f"model_{game}.pt"),
+                                map_location=DEVICE, weights_only=True)
+        state_dict = {k: v.float() for k, v in state_dict.items()}
+        model.load_state_dict(state_dict)
+        model.eval()
+        models[game] = model
+    return ModelCache(models)
 def _predict_8frames(model, context_tensor, last_tensor):
     output = model(context_tensor)  # (1, 24, 64, 64)
     residuals = output.reshape(1, PRED_FRAMES, 3, 64, 64)
     last_expanded = last_tensor.unsqueeze(1).expand_as(residuals)
+    return torch.clamp(last_expanded + residuals, 0, 1)  # (1, 8, 3, 64, 64)
+def predict_next_frame(cache, context_frames: np.ndarray) -> np.ndarray:
     n = len(context_frames)
+    # If cache exists and context grew (AR rollout), return next cached frame
+    if cache.cached_predictions is not None and n > CONTEXT_FRAMES and cache.cache_step < PRED_FRAMES:
+        result = cache.cached_predictions[cache.cache_step]
+        cache.cache_step += 1
+        if cache.cache_step >= PRED_FRAMES:
+            cache.reset_cache()
+        return result
+    # New window: predict all 8 frames
+    cache.reset_cache()
+    game = detect_game(context_frames)
+    model = cache.models[game]
     if n < CONTEXT_FRAMES:
         padding = np.stack([context_frames[0]] * (CONTEXT_FRAMES - n), axis=0)
         frames = np.concatenate([padding, context_frames], axis=0)
     last_frame = frames_norm[-1]
     last_frame_t = np.transpose(last_frame, (2, 0, 1))[np.newaxis]
+    with torch.no_grad():
+        context_tensor = torch.from_numpy(context).to(DEVICE)
+        last_tensor = torch.from_numpy(last_frame_t).to(DEVICE)
+        predicted_orig = _predict_8frames(model, context_tensor, last_tensor)
+        if game == "pong":
+            # Pong: no TTA (asymmetric)
+            predicted = predicted_orig
+        else:
+            # TTA: horizontal flip (dim=3 is width for (B, T, C, H, W) reshaped from (B, 24, H, W))
+            # But we work on (1, 24, H, W) context - flip along dim 3 (width)
             context_flipped = torch.flip(context_tensor, dims=[3])
             last_flipped = torch.flip(last_tensor, dims=[3])
             predicted_flipped = _predict_8frames(model, context_flipped, last_flipped)
+            # Flip back: predicted_flipped is (1, 8, 3, H, W), flip width dim=4
+            predicted_flipped = torch.flip(predicted_flipped, dims=[4])
             predicted = (predicted_orig + predicted_flipped) / 2.0
+    predicted_np = predicted[0].cpu().numpy()  # (8, 3, 64, 64)
+    cache.cached_predictions = []
+    for i in range(PRED_FRAMES):
+        frame = np.transpose(predicted_np[i], (1, 2, 0))
+        frame = (frame * 255).clip(0, 255).astype(np.uint8)
+        cache.cached_predictions.append(frame)
+    result = cache.cached_predictions[cache.cache_step]
+    cache.cache_step += 1
+    return result

train.log CHANGED Viewed

@@ -1,118 +1,152 @@
-[2026-04-11 17:22:57] Starting direct 8-frame training for 2026-04-11-180000-direct-8frame
-[2026-04-11 17:22:57] Device: cuda
-[2026-04-11 17:22:57] === pong ===
-[2026-04-11 17:22:57]   pong: 1,199,224 params (2.3 MB fp16)
-[2026-04-11 17:22:58]   pong train: 8194 seqs (len=16)
-[2026-04-11 17:22:58]   pong val: 964 seqs (len=16)
-[2026-04-11 17:23:19]   pong E1/100 | T:0.261624(S:0.6295) V:0.246500(S:0.6510) LR:3.00e-04
-[2026-04-11 17:23:38]   pong E2/100 | T:0.222204(S:0.6854) V:0.215782(S:0.6944) LR:3.00e-04
-[2026-04-11 17:23:58]   pong E3/100 | T:0.189432(S:0.7318) V:0.196561(S:0.7216) LR:2.99e-04
-[2026-04-11 17:24:16]   pong E4/100 | T:0.165138(S:0.7663) V:0.181374(S:0.7430) LR:2.99e-04
-[2026-04-11 17:24:35]   pong E5/100 | T:0.146892(S:0.7921) V:0.168277(S:0.7617) LR:2.98e-04
-[2026-04-11 17:24:55]   pong E6/100 | T:0.133788(S:0.8107) V:0.160231(S:0.7730) LR:2.97e-04
-[2026-04-11 17:25:16]   pong E7/100 | T:0.122975(S:0.8260) V:0.151815(S:0.7850) LR:2.96e-04
-[2026-04-11 17:25:55]   pong E9/100 | T:0.108118(S:0.8471) V:0.145642(S:0.7936) LR:2.94e-04
-[2026-04-11 17:26:16]   pong E10/100 | T:0.102042(S:0.8557) V:0.145171(S:0.7944) LR:2.93e-04
-[2026-04-11 17:26:35]   pong E11/100 | T:0.097010(S:0.8628) V:0.137783(S:0.8048) LR:2.91e-04
-[2026-04-11 17:27:16]   pong E13/100 | T:0.088259(S:0.8752) V:0.133496(S:0.8109) LR:2.88e-04
-[2026-04-11 17:27:36]   pong E14/100 | T:0.085017(S:0.8798) V:0.128416(S:0.8181) LR:2.86e-04
-[2026-04-11 17:28:54]   pong E18/100 | T:0.073576(S:0.8960) V:0.125769(S:0.8218) LR:2.77e-04
-[2026-04-11 17:29:34]   pong E20/100 | T:0.069607(S:0.9016) V:0.125478(S:0.8222) LR:2.71e-04
-[2026-04-11 17:30:34]   pong E23/100 | T:0.064038(S:0.9095) V:0.123089(S:0.8256) LR:2.63e-04
-[2026-04-11 17:31:15]   pong E25/100 | T:0.061200(S:0.9135) V:0.121373(S:0.8280) LR:2.56e-04
-[2026-04-11 17:31:34]   pong E26/100 | T:0.060173(S:0.9150) V:0.120411(S:0.8294) LR:2.53e-04
-[2026-04-11 17:31:53]   pong E27/100 | T:0.058330(S:0.9176) V:0.119902(S:0.8300) LR:2.49e-04
-[2026-04-11 17:32:33]   pong E29/100 | T:0.056276(S:0.9205) V:0.118473(S:0.8321) LR:2.42e-04
-[2026-04-11 17:32:53]   pong E30/100 | T:0.054990(S:0.9223) V:0.117756(S:0.8331) LR:2.38e-04
-[2026-04-11 17:33:56]   pong E33/100 | T:0.051766(S:0.9269) V:0.116454(S:0.8349) LR:2.27e-04
-[2026-04-11 17:34:37]   pong E35/100 | T:0.050350(S:0.9289) V:0.116344(S:0.8351) LR:2.18e-04
-[2026-04-11 17:34:58]   pong E36/100 | T:0.048892(S:0.9309) V:0.115702(S:0.8360) LR:2.14e-04
-[2026-04-11 17:35:41]   pong E38/100 | T:0.047967(S:0.9322) V:0.115106(S:0.8368) LR:2.06e-04
-[2026-04-11 17:36:23]   pong E40/100 | T:0.045971(S:0.9351) V:0.113090(S:0.8397) LR:1.97e-04
-[2026-04-11 17:36:42]   pong E41/100 | T:0.045044(S:0.9364) V:0.112823(S:0.8400) LR:1.92e-04
-[2026-04-11 17:37:03]   pong E42/100 | T:0.044532(S:0.9371) V:0.112635(S:0.8403) LR:1.88e-04
-[2026-04-11 17:38:03]   pong E45/100 | T:0.042640(S:0.9398) V:0.111960(S:0.8413) LR:1.74e-04
-[2026-04-11 17:38:42]   pong E47/100 | T:0.041282(S:0.9417) V:0.111201(S:0.8423) LR:1.65e-04
-[2026-04-11 17:39:02]   pong E48/100 | T:0.040975(S:0.9421) V:0.111088(S:0.8425) LR:1.60e-04
-[2026-04-11 17:39:42]   pong E50/100 | T:0.039824(S:0.9438) V:0.110872(S:0.8428) LR:1.50e-04
-[2026-04-11 17:40:22]   pong E52/100 | T:0.038633(S:0.9455) V:0.109695(S:0.8445) LR:1.41e-04
-[2026-04-11 17:41:02]   pong E54/100 | T:0.037921(S:0.9465) V:0.109453(S:0.8448) LR:1.32e-04
-[2026-04-11 17:41:56]   pong E57/100 | T:0.036545(S:0.9484) V:0.109087(S:0.8453) LR:1.18e-04
-[2026-04-11 17:42:17]   pong E58/100 | T:0.035855(S:0.9494) V:0.109036(S:0.8454) LR:1.13e-04
-[2026-04-11 17:42:36]   pong E59/100 | T:0.035576(S:0.9498) V:0.108473(S:0.8462) LR:1.09e-04
-[2026-04-11 17:42:56]   pong E60/100 | T:0.035289(S:0.9502) V:0.108379(S:0.8463) LR:1.04e-04
-[2026-04-11 17:43:35]   pong E62/100 | T:0.034419(S:0.9514) V:0.108306(S:0.8464) LR:9.55e-05
-[2026-04-11 17:43:55]   pong E63/100 | T:0.034142(S:0.9518) V:0.107726(S:0.8472) LR:9.11e-05
-[2026-04-11 17:44:33]   pong E65/100 | T:0.033535(S:0.9527) V:0.107617(S:0.8474) LR:8.26e-05
-[2026-04-11 17:45:48]   pong E69/100 | T:0.032286(S:0.9544) V:0.107517(S:0.8475) LR:6.65e-05
-[2026-04-11 17:46:06]   pong E70/100 | T:0.031978(S:0.9549) V:0.107445(S:0.8476) LR:6.26e-05
-[2026-04-11 17:46:27]   pong E71/100 | T:0.031772(S:0.9552) V:0.107240(S:0.8479) LR:5.89e-05
-[2026-04-11 17:46:46]   pong E72/100 | T:0.031589(S:0.9554) V:0.106995(S:0.8483) LR:5.52e-05
-[2026-04-11 17:47:06]   pong E73/100 | T:0.031315(S:0.9558) V:0.106564(S:0.8489) LR:5.16e-05
-[2026-04-11 17:49:21]   pong E80/100 | T:0.030090(S:0.9576) V:0.106367(S:0.8492) LR:2.96e-05
-[2026-04-11 17:50:40]   pong E84/100 | T:0.029537(S:0.9583) V:0.106164(S:0.8494) LR:1.95e-05
-[2026-04-11 17:51:00]   pong E85/100 | T:0.029495(S:0.9584) V:0.106157(S:0.8494) LR:1.73e-05
-[2026-04-11 17:51:40]   pong E87/100 | T:0.029253(S:0.9587) V:0.106032(S:0.8496) LR:1.33e-05
-[2026-04-11 17:52:00]   pong E88/100 | T:0.029200(S:0.9588) V:0.105951(S:0.8497) LR:1.15e-05
-[2026-04-11 17:52:39]   pong E90/100 | T:0.029051(S:0.9590) V:0.106003(S:0.8497) LR:8.32e-06
-[2026-04-11 17:53:00]   pong E91/100 | T:0.028990(S:0.9591) V:0.105951(S:0.8497) LR:6.94e-06
-[2026-04-11 17:55:39]   pong E99/100 | T:0.028765(S:0.9594) V:0.105859(S:0.8499) LR:1.07e-06
-[2026-04-11 17:55:59]   pong E100/100 | T:0.028750(S:0.9595) V:0.106039(S:0.8496) LR:1.00e-06
-[2026-04-11 17:55:59]   pong done. Best: 0.105859
-[2026-04-11 17:55:59] === pole_position ===
-[2026-04-11 17:55:59]   pole_position: 1,465,848 params (2.8 MB fp16)
-[2026-04-11 17:56:00]   pole_position train: 4097 seqs (len=16)
-[2026-04-11 17:56:00]   pole_position val: 482 seqs (len=16)
-[2026-04-11 17:56:10]   pole_position E1/100 | T:0.075590(S:0.9035) V:0.057663(S:0.9277) LR:3.00e-04
-[2026-04-11 17:56:20]   pole_position E2/100 | T:0.064522(S:0.9184) V:0.053095(S:0.9336) LR:3.00e-04
-[2026-04-11 17:56:30]   pole_position E3/100 | T:0.060081(S:0.9243) V:0.051676(S:0.9352) LR:2.99e-04
-[2026-04-11 17:56:40]   pole_position E4/100 | T:0.057492(S:0.9276) V:0.049574(S:0.9381) LR:2.99e-04
-[2026-04-11 17:56:50]   pole_position E5/100 | T:0.055706(S:0.9299) V:0.049319(S:0.9386) LR:2.98e-04
-[2026-04-11 17:57:01]   pole_position E6/100 | T:0.053870(S:0.9323) V:0.046328(S:0.9425) LR:2.97e-04
-[2026-04-11 17:57:21]   pole_position E8/100 | T:0.050747(S:0.9364) V:0.045251(S:0.9438) LR:2.95e-04
-[2026-04-11 17:57:31]   pole_position E9/100 | T:0.049606(S:0.9379) V:0.044027(S:0.9455) LR:2.94e-04
-[2026-04-11 17:57:41]   pole_position E10/100 | T:0.048376(S:0.9395) V:0.048385(S:0.9398) LR:2.93e-04
-[2026-04-11 17:58:02]   pole_position E12/100 | T:0.046456(S:0.9420) V:0.043792(S:0.9456) LR:2.90e-04
-[2026-04-11 17:58:23]   pole_position E14/100 | T:0.044549(S:0.9445) V:0.042208(S:0.9474) LR:2.86e-04
-[2026-04-11 17:58:53]   pole_position E17/100 | T:0.042305(S:0.9475) V:0.041426(S:0.9485) LR:2.79e-04
-[2026-04-11 17:59:24]   pole_position E20/100 | T:0.040529(S:0.9498) V:0.042968(S:0.9464) LR:2.71e-04
-[2026-04-11 18:01:03]   pole_position E30/100 | T:0.036013(S:0.9557) V:0.042198(S:0.9472) LR:2.38e-04
-[2026-04-11 18:02:43]   pole_position E40/100 | T:0.033191(S:0.9594) V:0.042161(S:0.9472) LR:1.97e-04
-[2026-04-11 18:04:25]   pole_position E50/100 | T:0.030915(S:0.9624) V:0.042456(S:0.9467) LR:1.50e-04
-[2026-04-11 18:06:07]   pole_position E60/100 | T:0.029315(S:0.9645) V:0.042134(S:0.9472) LR:1.04e-04
-[2026-04-11 18:07:47]   pole_position E70/100 | T:0.028091(S:0.9661) V:0.042533(S:0.9466) LR:6.26e-05
-[2026-04-11 18:09:29]   pole_position E80/100 | T:0.027237(S:0.9672) V:0.042501(S:0.9466) LR:2.96e-05
-[2026-04-11 18:11:08]   pole_position E90/100 | T:0.026795(S:0.9678) V:0.042639(S:0.9464) LR:8.32e-06
-[2026-04-11 18:12:49]   pole_position E100/100 | T:0.026658(S:0.9680) V:0.042839(S:0.9462) LR:1.00e-06
-[2026-04-11 18:12:49]   pole_position done. Best: 0.041426
-[2026-04-11 18:12:49] === sonic ===
-[2026-04-11 18:12:49]   sonic: 3,071,016 params (5.9 MB fp16)
-[2026-04-11 18:12:54]   sonic train: 30848 seqs (len=16)
-[2026-04-11 18:12:54]   sonic val: 3856 seqs (len=16)
-[2026-04-11 18:14:11]   sonic E1/100 | T:0.175591(S:0.7765) V:0.157856(S:0.8006) LR:3.00e-04
-[2026-04-11 18:15:26]   sonic E2/100 | T:0.159479(S:0.7978) V:0.149752(S:0.8110) LR:3.00e-04
-[2026-04-11 18:16:41]   sonic E3/100 | T:0.153539(S:0.8056) V:0.147334(S:0.8155) LR:2.99e-04
-[2026-04-11 18:17:56]   sonic E4/100 | T:0.149574(S:0.8108) V:0.144793(S:0.8177) LR:2.99e-04
-[2026-04-11 18:19:11]   sonic E5/100 | T:0.145852(S:0.8157) V:0.143262(S:0.8198) LR:2.98e-04
-[2026-04-11 18:20:26]   sonic E6/100 | T:0.142932(S:0.8196) V:0.142910(S:0.8208) LR:2.97e-04
-[2026-04-11 18:22:55]   sonic E8/100 | T:0.137437(S:0.8268) V:0.140100(S:0.8241) LR:2.95e-04
-[2026-04-11 18:25:24]   sonic E10/100 | T:0.132863(S:0.8329) V:0.137565(S:0.8273) LR:2.93e-04
-[2026-04-11 18:31:35]   sonic E15/100 | T:0.123970(S:0.8445) V:0.137460(S:0.8275) LR:2.84e-04
-[2026-04-11 18:35:22]   sonic E18/100 | T:0.119989(S:0.8498) V:0.137060(S:0.8281) LR:2.77e-04
-[2026-04-11 18:36:34]   sonic E19/100 | T:0.118824(S:0.8513) V:0.137037(S:0.8283) LR:2.75e-04
-[2026-04-11 18:37:49]   sonic E20/100 | T:0.117435(S:0.8531) V:0.137332(S:0.8278) LR:2.72e-04
-[2026-04-11 18:39:04]   sonic E21/100 | T:0.116390(S:0.8545) V:0.135734(S:0.8299) LR:2.70e-04
-[2026-04-11 18:50:11]   sonic E30/100 | T:0.108528(S:0.8649) V:0.139606(S:0.8249) LR:2.40e-04
-[2026-04-11 19:02:35]   sonic E40/100 | T:0.102536(S:0.8727) V:0.138633(S:0.8262) LR:2.00e-04
-[2026-04-11 19:14:57]   sonic E50/100 | T:0.098295(S:0.8783) V:0.139113(S:0.8252) LR:1.55e-04
-[2026-04-11 19:27:10]   sonic E60/100 | T:0.095048(S:0.8826) V:0.138777(S:0.8255) LR:1.10e-04
-[2026-04-11 19:39:39]   sonic E70/100 | T:0.092648(S:0.8857) V:0.141122(S:0.8224) LR:6.98e-05
-[2026-04-11 19:52:02]   sonic E80/100 | T:0.091056(S:0.8878) V:0.140701(S:0.8231) LR:3.77e-05
-[2026-04-11 20:04:24]   sonic E90/100 | T:0.090060(S:0.8891) V:0.141902(S:0.8214) LR:1.71e-05
-[2026-04-11 20:16:49]   sonic E100/100 | T:0.089632(S:0.8897) V:0.141938(S:0.8214) LR:1.00e-05
-[2026-04-11 20:16:49]   sonic done. Best: 0.135734
-[2026-04-11 20:16:49]   pong: 2.3 MB
-[2026-04-11 20:16:49]   pole_position: 2.8 MB
-[2026-04-11 20:16:49]   sonic: 5.9 MB
-[2026-04-11 20:16:49]   Total: 11.1 MB
-[2026-04-11 20:16:49] Training complete!

+[2026-04-11 20:37:53] Starting improved direct 8-frame training for 2026-04-11-210000-direct-improved
+[2026-04-11 20:37:53] Device: cuda
+[2026-04-11 20:37:53] === pong ===
+[2026-04-11 20:37:53]   pong: 1,199,224 params (2.3 MB fp16)
+[2026-04-11 20:37:54]   pong train: 8194 seqs (len=16)
+[2026-04-11 20:37:54]   pong val: 964 seqs (len=16)
+[2026-04-11 20:38:15]   pong E1/150 | T:0.245151(S:0.6311) V:0.225469(S:0.6589) LR:2.00e-04
+[2026-04-11 20:38:35]   pong E2/150 | T:0.207902(S:0.6846) V:0.207636(S:0.6836) LR:2.00e-04
+[2026-04-11 20:38:55]   pong E3/150 | T:0.183334(S:0.7217) V:0.184517(S:0.7193) LR:2.00e-04
+[2026-04-11 20:39:15]   pong E4/150 | T:0.162890(S:0.7525) V:0.170139(S:0.7405) LR:2.00e-04
+[2026-04-11 20:39:35]   pong E5/150 | T:0.148245(S:0.7747) V:0.168455(S:0.7455) LR:1.99e-04
+[2026-04-11 20:39:54]   pong E6/150 | T:0.134784(S:0.7951) V:0.155488(S:0.7639) LR:1.99e-04
+[2026-04-11 20:40:15]   pong E7/150 | T:0.125337(S:0.8093) V:0.153139(S:0.7670) LR:1.99e-04
+[2026-04-11 20:40:35]   pong E8/150 | T:0.117548(S:0.8213) V:0.140387(S:0.7865) LR:1.99e-04
+[2026-04-11 20:40:55]   pong E9/150 | T:0.110727(S:0.8316) V:0.139567(S:0.7879) LR:1.98e-04
+[2026-04-11 20:41:15]   pong E10/150 | T:0.105040(S:0.8404) V:0.135133(S:0.7943) LR:1.98e-04
+[2026-04-11 20:41:55]   pong E12/150 | T:0.096706(S:0.8530) V:0.133117(S:0.7976) LR:1.97e-04
+[2026-04-11 20:42:15]   pong E13/150 | T:0.093069(S:0.8586) V:0.128657(S:0.8049) LR:1.96e-04
+[2026-04-11 20:42:35]   pong E14/150 | T:0.089584(S:0.8641) V:0.127223(S:0.8071) LR:1.96e-04
+[2026-04-11 20:42:55]   pong E15/150 | T:0.086566(S:0.8686) V:0.123897(S:0.8117) LR:1.95e-04
+[2026-04-11 20:43:35]   pong E17/150 | T:0.081673(S:0.8762) V:0.123559(S:0.8119) LR:1.94e-04
+[2026-04-11 20:44:15]   pong E19/150 | T:0.076816(S:0.8836) V:0.120578(S:0.8168) LR:1.92e-04
+[2026-04-11 20:44:35]   pong E20/150 | T:0.075138(S:0.8862) V:0.123831(S:0.8129) LR:1.91e-04
+[2026-04-11 20:44:54]   pong E21/150 | T:0.072953(S:0.8897) V:0.120207(S:0.8171) LR:1.91e-04
+[2026-04-11 20:45:14]   pong E22/150 | T:0.071339(S:0.8922) V:0.118498(S:0.8202) LR:1.90e-04
+[2026-04-11 20:45:35]   pong E23/150 | T:0.069781(S:0.8947) V:0.117458(S:0.8223) LR:1.89e-04
+[2026-04-11 20:46:12]   pong E25/150 | T:0.067305(S:0.8984) V:0.116407(S:0.8233) LR:1.87e-04
+[2026-04-11 20:46:32]   pong E26/150 | T:0.065223(S:0.9018) V:0.115602(S:0.8242) LR:1.86e-04
+[2026-04-11 20:47:12]   pong E28/150 | T:0.062886(S:0.9054) V:0.113464(S:0.8277) LR:1.83e-04
+[2026-04-11 20:47:32]   pong E29/150 | T:0.061637(S:0.9073) V:0.112811(S:0.8285) LR:1.82e-04
+[2026-04-11 20:47:51]   pong E30/150 | T:0.060839(S:0.9086) V:0.112722(S:0.8292) LR:1.81e-04
+[2026-04-11 20:48:12]   pong E31/150 | T:0.059712(S:0.9104) V:0.111062(S:0.8316) LR:1.80e-04
+[2026-04-11 20:49:12]   pong E34/150 | T:0.056734(S:0.9149) V:0.111023(S:0.8316) LR:1.76e-04
+[2026-04-11 20:49:32]   pong E35/150 | T:0.055712(S:0.9165) V:0.110123(S:0.8330) LR:1.74e-04
+[2026-04-11 20:50:10]   pong E37/150 | T:0.054400(S:0.9186) V:0.109606(S:0.8340) LR:1.72e-04
+[2026-04-11 20:51:10]   pong E40/150 | T:0.052566(S:0.9215) V:0.109744(S:0.8340) LR:1.67e-04
+[2026-04-11 20:51:30]   pong E41/150 | T:0.051662(S:0.9229) V:0.108173(S:0.8359) LR:1.66e-04
+[2026-04-11 20:52:10]   pong E43/150 | T:0.050829(S:0.9242) V:0.107573(S:0.8367) LR:1.62e-04
+[2026-04-11 20:52:30]   pong E44/150 | T:0.049584(S:0.9261) V:0.107344(S:0.8375) LR:1.61e-04
+[2026-04-11 20:53:30]   pong E47/150 | T:0.047870(S:0.9288) V:0.107087(S:0.8377) LR:1.56e-04
+[2026-04-11 20:53:49]   pong E48/150 | T:0.047106(S:0.9299) V:0.105825(S:0.8395) LR:1.54e-04
+[2026-04-11 20:54:29]   pong E50/150 | T:0.046352(S:0.9312) V:0.108601(S:0.8357) LR:1.50e-04
+[2026-04-11 20:55:09]   pong E52/150 | T:0.045529(S:0.9325) V:0.105138(S:0.8406) LR:1.47e-04
+[2026-04-11 20:56:08]   pong E55/150 | T:0.043897(S:0.9350) V:0.104684(S:0.8416) LR:1.41e-04
+[2026-04-11 20:56:27]   pong E56/150 | T:0.043453(S:0.9357) V:0.104668(S:0.8417) LR:1.39e-04
+[2026-04-11 20:57:45]   pong E60/150 | T:0.041694(S:0.9384) V:0.103771(S:0.8432) LR:1.31e-04
+[2026-04-11 20:58:23]   pong E62/150 | T:0.040995(S:0.9395) V:0.103378(S:0.8433) LR:1.27e-04
+[2026-04-11 20:59:03]   pong E64/150 | T:0.040430(S:0.9403) V:0.102376(S:0.8449) LR:1.23e-04
+[2026-04-11 21:01:00]   pong E70/150 | T:0.038175(S:0.9439) V:0.101971(S:0.8454) LR:1.11e-04
+[2026-04-11 21:01:20]   pong E71/150 | T:0.038048(S:0.9441) V:0.101782(S:0.8458) LR:1.09e-04
+[2026-04-11 21:02:01]   pong E73/150 | T:0.037274(S:0.9453) V:0.101483(S:0.8464) LR:1.05e-04
+[2026-04-11 21:02:21]   pong E74/150 | T:0.037082(S:0.9456) V:0.101132(S:0.8470) LR:1.03e-04
+[2026-04-11 21:03:35]   pong E78/150 | T:0.036125(S:0.9471) V:0.100967(S:0.8472) LR:9.43e-05
+[2026-04-11 21:04:14]   pong E80/150 | T:0.035617(S:0.9479) V:0.101149(S:0.8473) LR:9.01e-05
+[2026-04-11 21:04:34]   pong E81/150 | T:0.035287(S:0.9484) V:0.100887(S:0.8473) LR:8.80e-05
+[2026-04-11 21:05:14]   pong E83/150 | T:0.034890(S:0.9490) V:0.100673(S:0.8476) LR:8.39e-05
+[2026-04-11 21:05:34]   pong E84/150 | T:0.034546(S:0.9495) V:0.100503(S:0.8481) LR:8.19e-05
+[2026-04-11 21:05:53]   pong E85/150 | T:0.034332(S:0.9499) V:0.100085(S:0.8486) LR:7.98e-05
+[2026-04-11 21:06:51]   pong E88/150 | T:0.033905(S:0.9505) V:0.099703(S:0.8491) LR:7.37e-05
+[2026-04-11 21:07:32]   pong E90/150 | T:0.033352(S:0.9514) V:0.100154(S:0.8485) LR:6.98e-05
+[2026-04-11 21:07:49]   pong E91/150 | T:0.033275(S:0.9515) V:0.099463(S:0.8496) LR:6.78e-05
+[2026-04-11 21:08:28]   pong E93/150 | T:0.032808(S:0.9522) V:0.099426(S:0.8497) LR:6.39e-05
+[2026-04-11 21:09:07]   pong E95/150 | T:0.032401(S:0.9529) V:0.099274(S:0.8498) LR:6.00e-05
+[2026-04-11 21:10:47]   pong E100/150 | T:0.031693(S:0.9539) V:0.099102(S:0.8502) LR:5.07e-05
+[2026-04-11 21:11:07]   pong E101/150 | T:0.031556(S:0.9542) V:0.098989(S:0.8504) LR:4.90e-05
+[2026-04-11 21:12:05]   pong E104/150 | T:0.031188(S:0.9547) V:0.098818(S:0.8506) LR:4.37e-05
+[2026-04-11 21:12:26]   pong E105/150 | T:0.031013(S:0.9550) V:0.098630(S:0.8508) LR:4.20e-05
+[2026-04-11 21:12:45]   pong E106/150 | T:0.030918(S:0.9552) V:0.098413(S:0.8511) LR:4.03e-05
+[2026-04-11 21:14:05]   pong E110/150 | T:0.030445(S:0.9559) V:0.099033(S:0.8506) LR:3.39e-05
+[2026-04-11 21:15:25]   pong E114/150 | T:0.030015(S:0.9566) V:0.098366(S:0.8513) LR:2.80e-05
+[2026-04-11 21:16:25]   pong E117/150 | T:0.029792(S:0.9569) V:0.098095(S:0.8517) LR:2.38e-05
+[2026-04-11 21:17:24]   pong E120/150 | T:0.029565(S:0.9573) V:0.098241(S:0.8515) LR:2.00e-05
+[2026-04-11 21:19:24]   pong E126/150 | T:0.029172(S:0.9579) V:0.098066(S:0.8518) LR:1.33e-05
+[2026-04-11 21:20:03]   pong E128/150 | T:0.029068(S:0.9580) V:0.098040(S:0.8518) LR:1.14e-05
+[2026-04-11 21:20:43]   pong E130/150 | T:0.029001(S:0.9581) V:0.098031(S:0.8518) LR:9.60e-06
+[2026-04-11 21:21:03]   pong E131/150 | T:0.028939(S:0.9582) V:0.097892(S:0.8521) LR:8.77e-06
+[2026-04-11 21:24:01]   pong E140/150 | T:0.028630(S:0.9587) V:0.097958(S:0.8520) LR:3.17e-06
+[2026-04-11 21:25:19]   pong E144/150 | T:0.028567(S:0.9588) V:0.097818(S:0.8522) LR:1.78e-06
+[2026-04-11 21:27:18]   pong E150/150 | T:0.028536(S:0.9589) V:0.097904(S:0.8521) LR:1.00e-06
+[2026-04-11 21:27:18]   pong done. Best: 0.097818
+[2026-04-11 21:27:18] === pole_position ===
+[2026-04-11 21:27:18]   pole_position: 1,465,848 params (2.8 MB fp16)
+[2026-04-11 21:27:19]   pole_position train: 4097 seqs (len=16)
+[2026-04-11 21:27:19]   pole_position val: 482 seqs (len=16)
+[2026-04-11 21:27:29]   pole_position E1/150 | T:0.072483(S:0.9013) V:0.055104(S:0.9272) LR:2.00e-04
+[2026-04-11 21:27:39]   pole_position E2/150 | T:0.060683(S:0.9178) V:0.049558(S:0.9337) LR:2.00e-04
+[2026-04-11 21:27:49]   pole_position E3/150 | T:0.056482(S:0.9234) V:0.049681(S:0.9330) LR:2.00e-04
+[2026-04-11 21:28:00]   pole_position E4/150 | T:0.053962(S:0.9268) V:0.047040(S:0.9371) LR:2.00e-04
+[2026-04-11 21:28:09]   pole_position E5/150 | T:0.051783(S:0.9298) V:0.043891(S:0.9410) LR:1.99e-04
+[2026-04-11 21:28:39]   pole_position E8/150 | T:0.047291(S:0.9360) V:0.042535(S:0.9431) LR:1.99e-04
+[2026-04-11 21:28:48]   pole_position E9/150 | T:0.046385(S:0.9372) V:0.041829(S:0.9440) LR:1.98e-04
+[2026-04-11 21:28:58]   pole_position E10/150 | T:0.045514(S:0.9385) V:0.042079(S:0.9438) LR:1.98e-04
+[2026-04-11 21:29:09]   pole_position E11/150 | T:0.044486(S:0.9399) V:0.040944(S:0.9449) LR:1.97e-04
+[2026-04-11 21:29:47]   pole_position E15/150 | T:0.041111(S:0.9447) V:0.039725(S:0.9465) LR:1.95e-04
+[2026-04-11 21:30:37]   pole_position E20/150 | T:0.038348(S:0.9487) V:0.038742(S:0.9477) LR:1.91e-04
+[2026-04-11 21:30:46]   pole_position E21/150 | T:0.037787(S:0.9495) V:0.038655(S:0.9481) LR:1.91e-04
+[2026-04-11 21:31:18]   pole_position E24/150 | T:0.036649(S:0.9511) V:0.038322(S:0.9481) LR:1.88e-04
+[2026-04-11 21:32:18]   pole_position E30/150 | T:0.034550(S:0.9541) V:0.038329(S:0.9484) LR:1.81e-04
+[2026-04-11 21:33:18]   pole_position E36/150 | T:0.033149(S:0.9561) V:0.038183(S:0.9480) LR:1.73e-04
+[2026-04-11 21:33:59]   pole_position E40/150 | T:0.032073(S:0.9577) V:0.038579(S:0.9475) LR:1.67e-04
+[2026-04-11 21:34:09]   pole_position E41/150 | T:0.031761(S:0.9581) V:0.037873(S:0.9485) LR:1.66e-04
+[2026-04-11 21:34:19]   pole_position E42/150 | T:0.031713(S:0.9582) V:0.036978(S:0.9498) LR:1.64e-04
+[2026-04-11 21:35:39]   pole_position E50/150 | T:0.030141(S:0.9604) V:0.038688(S:0.9470) LR:1.50e-04
+[2026-04-11 21:37:19]   pole_position E60/150 | T:0.028820(S:0.9623) V:0.038451(S:0.9473) LR:1.31e-04
+[2026-04-11 21:39:00]   pole_position E70/150 | T:0.027896(S:0.9636) V:0.038069(S:0.9478) LR:1.11e-04
+[2026-04-11 21:40:41]   pole_position E80/150 | T:0.026692(S:0.9653) V:0.038111(S:0.9476) LR:9.01e-05
+[2026-04-11 21:42:22]   pole_position E90/150 | T:0.025885(S:0.9665) V:0.037698(S:0.9482) LR:6.98e-05
+[2026-04-11 21:44:01]   pole_position E100/150 | T:0.025252(S:0.9674) V:0.038106(S:0.9475) LR:5.07e-05
+[2026-04-11 21:45:42]   pole_position E110/150 | T:0.024739(S:0.9681) V:0.037902(S:0.9479) LR:3.39e-05
+[2026-04-11 21:47:24]   pole_position E120/150 | T:0.024433(S:0.9685) V:0.037644(S:0.9482) LR:2.00e-05
+[2026-04-11 21:49:05]   pole_position E130/150 | T:0.024134(S:0.9690) V:0.037839(S:0.9479) LR:9.60e-06
+[2026-04-11 21:50:45]   pole_position E140/150 | T:0.024041(S:0.9691) V:0.037827(S:0.9479) LR:3.17e-06
+[2026-04-11 21:52:25]   pole_position E150/150 | T:0.023978(S:0.9692) V:0.037890(S:0.9478) LR:1.00e-06
+[2026-04-11 21:52:25]   pole_position done. Best: 0.036978
+[2026-04-11 21:52:25] === sonic ===
+[2026-04-11 21:52:25]   sonic: 3,071,016 params (5.9 MB fp16)
+[2026-04-11 21:52:29]   sonic train: 30848 seqs (len=16)
+[2026-04-11 21:52:30]   sonic val: 3856 seqs (len=16)
+[2026-04-11 21:53:45]   sonic E1/150 | T:0.162348(S:0.7775) V:0.144778(S:0.8026) LR:2.00e-04
+[2026-04-11 21:54:58]   sonic E2/150 | T:0.146694(S:0.7991) V:0.136146(S:0.8135) LR:2.00e-04
+[2026-04-11 21:56:13]   sonic E3/150 | T:0.140699(S:0.8074) V:0.134994(S:0.8151) LR:2.00e-04
+[2026-04-11 21:57:28]   sonic E4/150 | T:0.136883(S:0.8127) V:0.131966(S:0.8195) LR:2.00e-04
+[2026-04-11 21:58:43]   sonic E5/150 | T:0.133509(S:0.8175) V:0.129399(S:0.8230) LR:1.99e-04
+[2026-04-11 21:59:57]   sonic E6/150 | T:0.130435(S:0.8218) V:0.128735(S:0.8233) LR:1.99e-04
+[2026-04-11 22:01:14]   sonic E7/150 | T:0.127966(S:0.8254) V:0.126909(S:0.8259) LR:1.99e-04
+[2026-04-11 22:04:57]   sonic E10/150 | T:0.121944(S:0.8340) V:0.124331(S:0.8295) LR:1.98e-04
+[2026-04-11 22:06:11]   sonic E11/150 | T:0.120293(S:0.8364) V:0.123743(S:0.8304) LR:1.97e-04
+[2026-04-11 22:08:39]   sonic E13/150 | T:0.117218(S:0.8409) V:0.122889(S:0.8311) LR:1.97e-04
+[2026-04-11 22:14:48]   sonic E18/150 | T:0.111090(S:0.8498) V:0.122588(S:0.8317) LR:1.93e-04
+[2026-04-11 22:17:15]   sonic E20/150 | T:0.109268(S:0.8525) V:0.125544(S:0.8273) LR:1.92e-04
+[2026-04-11 22:20:54]   sonic E23/150 | T:0.106768(S:0.8561) V:0.121705(S:0.8325) LR:1.89e-04
+[2026-04-11 22:23:25]   sonic E25/150 | T:0.105207(S:0.8584) V:0.121577(S:0.8323) LR:1.87e-04
+[2026-04-11 22:27:00]   sonic E28/150 | T:0.103225(S:0.8613) V:0.120523(S:0.8339) LR:1.84e-04
+[2026-04-11 22:29:26]   sonic E30/150 | T:0.102327(S:0.8626) V:0.123279(S:0.8299) LR:1.82e-04
+[2026-04-11 22:41:41]   sonic E40/150 | T:0.097330(S:0.8698) V:0.123097(S:0.8299) LR:1.69e-04
+[2026-04-11 22:53:59]   sonic E50/150 | T:0.093990(S:0.8746) V:0.124820(S:0.8272) LR:1.53e-04
+[2026-04-11 23:06:25]   sonic E60/150 | T:0.091507(S:0.8782) V:0.124273(S:0.8274) LR:1.34e-04
+[2026-04-11 23:18:48]   sonic E70/150 | T:0.089327(S:0.8813) V:0.123787(S:0.8284) LR:1.15e-04
+[2026-04-11 23:31:12]   sonic E80/150 | T:0.087458(S:0.8839) V:0.122982(S:0.8295) LR:9.51e-05
+[2026-04-11 23:44:30]   sonic E90/150 | T:0.085960(S:0.8861) V:0.123807(S:0.8280) LR:7.56e-05
+[2026-04-11 23:57:54]   sonic E100/150 | T:0.084800(S:0.8877) V:0.124093(S:0.8276) LR:5.75e-05
+[2026-04-12 00:11:21]   sonic E110/150 | T:0.083786(S:0.8892) V:0.124339(S:0.8272) LR:4.14e-05
+[2026-04-12 00:25:17]   sonic E120/150 | T:0.083098(S:0.8902) V:0.125028(S:0.8261) LR:2.81e-05
+[2026-04-12 00:39:13]   sonic E130/150 | T:0.082543(S:0.8910) V:0.124983(S:0.8261) LR:1.82e-05
+[2026-04-12 00:53:09]   sonic E140/150 | T:0.082184(S:0.8915) V:0.125446(S:0.8255) LR:1.21e-05
+[2026-04-12 01:06:48]   sonic E150/150 | T:0.082048(S:0.8917) V:0.124580(S:0.8267) LR:1.00e-05
+[2026-04-12 01:06:48]   sonic done. Best: 0.120523
+[2026-04-12 01:06:48]   pong: 2.3 MB
+[2026-04-12 01:06:48]   pole_position: 2.8 MB
+[2026-04-12 01:06:48]   sonic: 5.9 MB
+[2026-04-12 01:06:48]   Total: 11.1 MB
+[2026-04-12 01:06:48] Training complete!