moudook commited on
Commit
813b8f2
·
verified ·
1 Parent(s): 4045966

config step=22500

Browse files
Files changed (1) hide show
  1. config.json +9 -14
config.json CHANGED
@@ -1,16 +1,11 @@
1
  {
2
  "checkpoint_dir": "/workspace/adaptive_model/checkpoints",
3
  "tokenizer_dir": "/workspace/adaptive_model/tokenizer",
4
- "run_name": "drlm_v1_diffusion",
5
  "hf_repo_id": "moudook/adaptive-model",
6
  "hf_push_every_n": 3,
7
  "dataset_names": [
8
- "open-thoughts/OpenThoughts-114k",
9
- "mlabonne/FineTome-100k",
10
- "agentlans/TeichAI-thinking-reasoning-x",
11
- "m-a-p/TerminalTraj",
12
- "nvidia/HelpSteer2",
13
- "glaiveai/glaive-function-calling-v2"
14
  ],
15
  "local_paths": {},
16
  "vocab_size": 32000,
@@ -21,21 +16,21 @@
21
  "intermediate_dim": 5504,
22
  "max_seq_len": 2048,
23
  "dtype": "bfloat16",
24
- "learning_rate": 0.0003,
25
  "weight_decay": 0.1,
26
  "beta1": 0.9,
27
  "beta2": 0.95,
28
  "grad_clip": 1.0,
29
  "warmup_steps": 200,
30
- "total_steps": 50000,
31
- "batch_size": 32,
32
- "grad_accum": 4,
33
- "save_every": 250,
34
  "log_every": 10,
35
- "keep_last_n": 5,
36
  "use_wandb": false,
37
  "wandb_project": "adaptive-model",
38
  "device": "cuda",
39
  "seed": 42,
40
- "prefetch_batches": 4
41
  }
 
1
  {
2
  "checkpoint_dir": "/workspace/adaptive_model/checkpoints",
3
  "tokenizer_dir": "/workspace/adaptive_model/tokenizer",
4
+ "run_name": "phase2_deepseek_exclusive",
5
  "hf_repo_id": "moudook/adaptive-model",
6
  "hf_push_every_n": 3,
7
  "dataset_names": [
8
+ "a-m-team/AM-DeepSeek-R1-Distilled-1.4M"
 
 
 
 
 
9
  ],
10
  "local_paths": {},
11
  "vocab_size": 32000,
 
16
  "intermediate_dim": 5504,
17
  "max_seq_len": 2048,
18
  "dtype": "bfloat16",
19
+ "learning_rate": 0.0001,
20
  "weight_decay": 0.1,
21
  "beta1": 0.9,
22
  "beta2": 0.95,
23
  "grad_clip": 1.0,
24
  "warmup_steps": 200,
25
+ "total_steps": 100000,
26
+ "batch_size": 64,
27
+ "grad_accum": 2,
28
+ "save_every": 500,
29
  "log_every": 10,
30
+ "keep_last_n": 3,
31
  "use_wandb": false,
32
  "wandb_project": "adaptive-model",
33
  "device": "cuda",
34
  "seed": 42,
35
+ "prefetch_batches": 8
36
  }