# LibreHPS-4B training config — frozen for the v1.1 weights release. # This is an archive of the harness config used to produce the # released weights and is provided for reproducibility. It is NOT the # config you would use for new training runs (paths point inside the # original training tree). model: name_or_path: "Qwen/Qwen3.5-4B-Base" is_moe: false reward_token: "<|Reward|>" dropout: 0.05 data: index_parquet: "librehps/dataset/index.parquet" num_video_frames: 6 modalities: null # all of image | video | scalar_image | scalar_video max_length: 2048 optimizer: visual_lr: 2.0e-6 merger_lr: 2.0e-6 rm_head_lr: 2.0e-6 special_token_lr: 2.0e-6 weight_decay: 0.0 betas: [0.9, 0.95] scheduler: kind: "constant_with_warmup" warmup_ratio: 0.05 training: global_batch_size: 384 per_gpu_micro_batch: 12 gradient_accumulation: 4 epochs: 1 lambda_reg: 0.1 lambda_router: 0.0 # dense — router loss inactive seed: 42 # Cap optimiser steps for bounded runs. null = run to ``epochs`` # completion. Overridable via ``--smoke-steps N`` CLI or # ``LIBREHPS_SMOKE_STEPS=N`` env var. max_train_steps: null precision: kind: "bf16" deepspeed: config_path: "librehps/config/ds_config/zero3.json" fa4: cache_enabled: true cache_dir: "/home/train/.cache/flash_attn_cute_dsl" logging: every_steps: 10 checkpoint: dir: "checkpoints/LibreHPS_4B" every_steps: 500