Deploy EchoSelf NanEcho model (workflow run 173)

Files changed (4) hide show

README.md CHANGED Viewed

@@ -19,16 +19,16 @@ capabilities inspired by cognitive science and AGI research.
 ## Model Architecture
 - **Base Architecture**: GPT-2
-- **Parameters**: 12 layers, 768 embedding dimensions
 - **Vocabulary Size**: 50257
 - **Context Length**: N/A tokens
 ## Training Details
-- **Checkpoint ID**: unknown
-- **Training Iteration**: N/A
-- **Validation Loss**: N/A
-- **Quality Score**: N/A
 ## Echo Self Features

 ## Model Architecture
 - **Base Architecture**: GPT-2
+- **Parameters**: 4 layers, 256 embedding dimensions
 - **Vocabulary Size**: 50257
 - **Context Length**: N/A tokens
 ## Training Details
+- **Checkpoint ID**: ckpt_20260327_100303_1000_22deff1b_9470fbb7
+- **Training Iteration**: 1000
+- **Validation Loss**: 3.109394931793213
+- **Quality Score**: 153600.45098766152
 ## Echo Self Features

config.json CHANGED Viewed

@@ -4,9 +4,9 @@
     "GPT2LMHeadModel"
   ],
   "vocab_size": 50257,
-  "n_embd": 768,
-  "n_head": 12,
-  "n_layer": 12,
   "n_positions": 1024,
   "embd_pdrop": 0.1,
   "attn_pdrop": 0.1,

     "GPT2LMHeadModel"
   ],
   "vocab_size": 50257,
+  "n_embd": 256,
+  "n_head": 4,
+  "n_layer": 4,
   "n_positions": 1024,
   "embd_pdrop": 0.1,
   "attn_pdrop": 0.1,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd7e0ada6dfb700d5bec74f5f5dab751c4b3a1982517ac374735d3b15512b866
-size 1297

 version https://git-lfs.github.com/spec/v1
+oid sha256:f65fd8c0eba030607161cb3e61bf20ad603456a0c2321d2fd89038c1e185c2e5
+size 51464851

training_metadata.json CHANGED Viewed

+{
+  "checkpoint_id": "ckpt_20260327_100303_1000_22deff1b_9470fbb7",
+  "created_at": "2026-03-27T10:03:03.231247",
+  "iteration": 1000,
+  "epoch": 0,
+  "train_loss": 0.0,
+  "val_loss": 3.109394931793213,
+  "learning_rate": 2.7972e-05,
+  "model_config": {
+    "n_layer": 4,
+    "n_head": 4,
+    "n_embd": 256,
+    "vocab_size": 50257,
+    "block_size": 1024,
+    "dropout": 0.1,
+    "bias": true,
+    "initial_connections": 0.1,
+    "connection_growth_rate": 0.05,
+    "max_connections": 1.0
+  },
+  "training_config": {
+    "learning_rate": 0.0002,
+    "batch_size": 2,
+    "max_iters": 500,
+    "warmup_iters": 5000,
+    "lr_decay_iters": 50000,
+    "min_lr": 1e-05,
+    "weight_decay": 0.1,
+    "grad_clip": 1.0,
+    "enable_curriculum_learning": true,
+    "enable_introspection": true
+  },
+  "data_config": {
+    "data_dir": "data/nanecho",
+    "batch_size": 2,
+    "block_size": 1024
+  },
+  "metrics": {
+    "val_loss": 3.109394931793213,
+    "connection_ratio": 0.5499999999999999,
+    "tokens_processed": 2048000,
+    "training_speed_iters_per_sec": 0.08255550023854462
+  },
+  "tags": [
+    "phase_adaptive_mastery",
+    "medium_quality",
+    "nanecho",
+    "curriculum",
+    "introspection"
+  ],
+  "parent_checkpoint": null,
+  "notes": "Training checkpoint at iteration 1000 (resumed from iteration 500) | Phase: adaptive_mastery",
+  "file_size_mb": 249.29106044769287,
+  "quality_score": 153600.45098766152
+}