LUNA-Training / rag_mcp_lora_config.yaml
ASTERIZER's picture
Upload rag_mcp_lora_config.yaml with huggingface_hub
a783c2e verified
auto_config: true
hf_model_repo: "ASTERIZER/LUNA-100M"
hf_model_file: "sft_v1/final/model.pth"
pretrained_ckpt: "Base/out/input_models/luna_sft_v1/model.pth"
train_json: "Base/Datasets/rag_mcp_sft/train.json"
val_json: "Base/Datasets/rag_mcp_sft/val.json"
out_dir: "Base/out/sft/rag_mcp_lora"
tokenizer_dir: "Base/checkpoints/EleutherAI/pythia-160m"
model:
vocab_size: 50304
seq_len: 1024
n_layer: 10
n_embd: 768
n_head: 12
train:
epochs: 2
lr_warmup_steps: 100
save_interval: 250
log_interval: 10
eval_interval: 250
max_norm: 1.0
optimizer:
lr: 2.0e-4
min_lr: 2.0e-5
weight_decay: 0.0
betas: [0.9, 0.95]
eps: 1.0e-8
batch:
global_batch: 64
micro_batch: 8
grad_accum: 8
auto_probe_batch: true
probe_safety: 0.94
dataloader:
num_workers: 4
pin_memory: true
hardware:
precision: "bf16"
lora:
rank: 16
alpha: 32
dropout: 0.05
target_modules:
- "attn.c_attn"
- "attn.c_proj"
- "mlp.fc"
- "mlp.proj"
eval_prompts:
- "Explain what retrieval-augmented generation is and why teams use it."
- "Describe the core parts of MCP for an engineer who has never used it."
- "Compare RAG and MCP without confusing their roles."
- "What are good practices for grounding answers with retrieved context?"
- "How can an MCP server expose retrieval capabilities to an AI host?"