ASTERIZER
/

LUNA-Training

ASTERIZER commited on 18 days ago

Commit

caa2a1b

verified ·

1 Parent(s): 0122e75

Upload rag_mcp_full_sft_config.yaml with huggingface_hub

Files changed (1) hide show

rag_mcp_full_sft_config.yaml ADDED Viewed

+auto_config: true
+hf_model_repo: "ASTERIZER/LUNA-100M"
+hf_model_file: "sft_v1/final/model.pth"
+hf_dataset_repo: "ASTERIZER/LUNA-RAG-MCP-SFT-10M"
+pretrained_ckpt: "Base/out/input_models/luna_sft_v1/sft_v1/final/model.pth"
+train_json: "Base/Datasets/rag_mcp_sft/train.json"
+val_json: "Base/Datasets/rag_mcp_sft/val.json"
+out_dir: "Base/out/sft/rag_mcp_full_sft"
+tokenizer_dir: "Base/checkpoints/EleutherAI/pythia-160m"
+model:
+  vocab_size: 50304
+  seq_len: 1024
+  n_layer: 10
+  n_embd: 768
+  n_head: 12
+train:
+  epochs: 2
+  max_tokens: 0
+  lr_warmup_steps: 100
+  save_interval: 250
+  log_interval: 10
+  eval_interval: 250
+  max_norm: 1.0
+optimizer:
+  lr: 8.0e-6
+  min_lr: 8.0e-7
+  weight_decay: 0.01
+  betas: [0.9, 0.95]
+  eps: 1.0e-8
+batch:
+  global_batch: 48
+  micro_batch: 4
+  grad_accum: 12
+dataloader:
+  num_workers: 4
+  pin_memory: true
+hardware:
+  precision: "bf16"
+  compile: false
+eval_prompts:
+  - "Explain retrieval-augmented generation in practical engineering terms."
+  - "What problem does MCP solve for AI applications?"
+  - "Compare RAG and MCP clearly without mixing them together."
+  - "How should a model use retrieved context without overclaiming?"
+  - "Describe how an MCP server can expose tools or retrieval to a host model."