ASTERIZER commited on
Commit
caa2a1b
·
verified ·
1 Parent(s): 0122e75

Upload rag_mcp_full_sft_config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. rag_mcp_full_sft_config.yaml +54 -0
rag_mcp_full_sft_config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_config: true
2
+
3
+ hf_model_repo: "ASTERIZER/LUNA-100M"
4
+ hf_model_file: "sft_v1/final/model.pth"
5
+ hf_dataset_repo: "ASTERIZER/LUNA-RAG-MCP-SFT-10M"
6
+ pretrained_ckpt: "Base/out/input_models/luna_sft_v1/sft_v1/final/model.pth"
7
+
8
+ train_json: "Base/Datasets/rag_mcp_sft/train.json"
9
+ val_json: "Base/Datasets/rag_mcp_sft/val.json"
10
+ out_dir: "Base/out/sft/rag_mcp_full_sft"
11
+ tokenizer_dir: "Base/checkpoints/EleutherAI/pythia-160m"
12
+
13
+ model:
14
+ vocab_size: 50304
15
+ seq_len: 1024
16
+ n_layer: 10
17
+ n_embd: 768
18
+ n_head: 12
19
+
20
+ train:
21
+ epochs: 2
22
+ max_tokens: 0
23
+ lr_warmup_steps: 100
24
+ save_interval: 250
25
+ log_interval: 10
26
+ eval_interval: 250
27
+ max_norm: 1.0
28
+
29
+ optimizer:
30
+ lr: 8.0e-6
31
+ min_lr: 8.0e-7
32
+ weight_decay: 0.01
33
+ betas: [0.9, 0.95]
34
+ eps: 1.0e-8
35
+
36
+ batch:
37
+ global_batch: 48
38
+ micro_batch: 4
39
+ grad_accum: 12
40
+
41
+ dataloader:
42
+ num_workers: 4
43
+ pin_memory: true
44
+
45
+ hardware:
46
+ precision: "bf16"
47
+ compile: false
48
+
49
+ eval_prompts:
50
+ - "Explain retrieval-augmented generation in practical engineering terms."
51
+ - "What problem does MCP solve for AI applications?"
52
+ - "Compare RAG and MCP clearly without mixing them together."
53
+ - "How should a model use retrieved context without overclaiming?"
54
+ - "Describe how an MCP server can expose tools or retrieval to a host model."