# LibreHPS-4B training config — frozen for the v1.1 weights release.
# This is an archive of the harness config used to produce the
# released weights and is provided for reproducibility. It is NOT the
# config you would use for new training runs (paths point inside the
# original training tree).
model:
  name_or_path: "Qwen/Qwen3.5-4B-Base"
  is_moe: false
  reward_token: "<|Reward|>"
  dropout: 0.05

data:
  index_parquet: "librehps/dataset/index.parquet"
  num_video_frames: 6
  modalities: null   # all of image | video | scalar_image | scalar_video
  max_length: 2048

optimizer:
  visual_lr: 2.0e-6
  merger_lr: 2.0e-6
  rm_head_lr: 2.0e-6
  special_token_lr: 2.0e-6
  weight_decay: 0.0
  betas: [0.9, 0.95]

scheduler:
  kind: "constant_with_warmup"
  warmup_ratio: 0.05

training:
  global_batch_size: 384
  per_gpu_micro_batch: 12
  gradient_accumulation: 4
  epochs: 1
  lambda_reg: 0.1
  lambda_router: 0.0    # dense — router loss inactive
  seed: 42
  # Cap optimiser steps for bounded runs. null = run to ``epochs``
  # completion. Overridable via ``--smoke-steps N`` CLI or
  # ``LIBREHPS_SMOKE_STEPS=N`` env var.
  max_train_steps: null

precision:
  kind: "bf16"

deepspeed:
  config_path: "librehps/config/ds_config/zero3.json"

fa4:
  cache_enabled: true
  cache_dir: "/home/train/.cache/flash_attn_cute_dsl"

logging:
  every_steps: 10

checkpoint:
  dir: "checkpoints/LibreHPS_4B"
  every_steps: 500