drzo commited on
Commit
8231c96
·
verified ·
1 Parent(s): 28d0f1b

Deploy EchoSelf NanEcho model (workflow run 173)

Browse files
Files changed (4) hide show
  1. README.md +5 -5
  2. config.json +3 -3
  3. pytorch_model.bin +2 -2
  4. training_metadata.json +55 -1
README.md CHANGED
@@ -19,16 +19,16 @@ capabilities inspired by cognitive science and AGI research.
19
  ## Model Architecture
20
 
21
  - **Base Architecture**: GPT-2
22
- - **Parameters**: 12 layers, 768 embedding dimensions
23
  - **Vocabulary Size**: 50257
24
  - **Context Length**: N/A tokens
25
 
26
  ## Training Details
27
 
28
- - **Checkpoint ID**: unknown
29
- - **Training Iteration**: N/A
30
- - **Validation Loss**: N/A
31
- - **Quality Score**: N/A
32
 
33
  ## Echo Self Features
34
 
 
19
  ## Model Architecture
20
 
21
  - **Base Architecture**: GPT-2
22
+ - **Parameters**: 4 layers, 256 embedding dimensions
23
  - **Vocabulary Size**: 50257
24
  - **Context Length**: N/A tokens
25
 
26
  ## Training Details
27
 
28
+ - **Checkpoint ID**: ckpt_20260327_100303_1000_22deff1b_9470fbb7
29
+ - **Training Iteration**: 1000
30
+ - **Validation Loss**: 3.109394931793213
31
+ - **Quality Score**: 153600.45098766152
32
 
33
  ## Echo Self Features
34
 
config.json CHANGED
@@ -4,9 +4,9 @@
4
  "GPT2LMHeadModel"
5
  ],
6
  "vocab_size": 50257,
7
- "n_embd": 768,
8
- "n_head": 12,
9
- "n_layer": 12,
10
  "n_positions": 1024,
11
  "embd_pdrop": 0.1,
12
  "attn_pdrop": 0.1,
 
4
  "GPT2LMHeadModel"
5
  ],
6
  "vocab_size": 50257,
7
+ "n_embd": 256,
8
+ "n_head": 4,
9
+ "n_layer": 4,
10
  "n_positions": 1024,
11
  "embd_pdrop": 0.1,
12
  "attn_pdrop": 0.1,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd7e0ada6dfb700d5bec74f5f5dab751c4b3a1982517ac374735d3b15512b866
3
- size 1297
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f65fd8c0eba030607161cb3e61bf20ad603456a0c2321d2fd89038c1e185c2e5
3
+ size 51464851
training_metadata.json CHANGED
@@ -1 +1,55 @@
1
- {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint_id": "ckpt_20260327_100303_1000_22deff1b_9470fbb7",
3
+ "created_at": "2026-03-27T10:03:03.231247",
4
+ "iteration": 1000,
5
+ "epoch": 0,
6
+ "train_loss": 0.0,
7
+ "val_loss": 3.109394931793213,
8
+ "learning_rate": 2.7972e-05,
9
+ "model_config": {
10
+ "n_layer": 4,
11
+ "n_head": 4,
12
+ "n_embd": 256,
13
+ "vocab_size": 50257,
14
+ "block_size": 1024,
15
+ "dropout": 0.1,
16
+ "bias": true,
17
+ "initial_connections": 0.1,
18
+ "connection_growth_rate": 0.05,
19
+ "max_connections": 1.0
20
+ },
21
+ "training_config": {
22
+ "learning_rate": 0.0002,
23
+ "batch_size": 2,
24
+ "max_iters": 500,
25
+ "warmup_iters": 5000,
26
+ "lr_decay_iters": 50000,
27
+ "min_lr": 1e-05,
28
+ "weight_decay": 0.1,
29
+ "grad_clip": 1.0,
30
+ "enable_curriculum_learning": true,
31
+ "enable_introspection": true
32
+ },
33
+ "data_config": {
34
+ "data_dir": "data/nanecho",
35
+ "batch_size": 2,
36
+ "block_size": 1024
37
+ },
38
+ "metrics": {
39
+ "val_loss": 3.109394931793213,
40
+ "connection_ratio": 0.5499999999999999,
41
+ "tokens_processed": 2048000,
42
+ "training_speed_iters_per_sec": 0.08255550023854462
43
+ },
44
+ "tags": [
45
+ "phase_adaptive_mastery",
46
+ "medium_quality",
47
+ "nanecho",
48
+ "curriculum",
49
+ "introspection"
50
+ ],
51
+ "parent_checkpoint": null,
52
+ "notes": "Training checkpoint at iteration 1000 (resumed from iteration 500) | Phase: adaptive_mastery",
53
+ "file_size_mb": 249.29106044769287,
54
+ "quality_score": 153600.45098766152
55
+ }