LibreHPS-4B-v1.1 / training_config.yaml
Jeff Moe
Training configuration
6748efb
# LibreHPS-4B training config — frozen for the v1.1 weights release.
# This is an archive of the harness config used to produce the
# released weights and is provided for reproducibility. It is NOT the
# config you would use for new training runs (paths point inside the
# original training tree).
model:
name_or_path: "Qwen/Qwen3.5-4B-Base"
is_moe: false
reward_token: "<|Reward|>"
dropout: 0.05
data:
index_parquet: "librehps/dataset/index.parquet"
num_video_frames: 6
modalities: null # all of image | video | scalar_image | scalar_video
max_length: 2048
optimizer:
visual_lr: 2.0e-6
merger_lr: 2.0e-6
rm_head_lr: 2.0e-6
special_token_lr: 2.0e-6
weight_decay: 0.0
betas: [0.9, 0.95]
scheduler:
kind: "constant_with_warmup"
warmup_ratio: 0.05
training:
global_batch_size: 384
per_gpu_micro_batch: 12
gradient_accumulation: 4
epochs: 1
lambda_reg: 0.1
lambda_router: 0.0 # dense — router loss inactive
seed: 42
# Cap optimiser steps for bounded runs. null = run to ``epochs``
# completion. Overridable via ``--smoke-steps N`` CLI or
# ``LIBREHPS_SMOKE_STEPS=N`` env var.
max_train_steps: null
precision:
kind: "bf16"
deepspeed:
config_path: "librehps/config/ds_config/zero3.json"
fa4:
cache_enabled: true
cache_dir: "/home/train/.cache/flash_attn_cute_dsl"
logging:
every_steps: 10
checkpoint:
dir: "checkpoints/LibreHPS_4B"
every_steps: 500