deepcrayon
/

LibreHPS-4B-v1.1

Image Classification

image-text-to-text

video-text-to-text

Model card Files Files and versions

Jeff Moe commited on 4 days ago

Commit

6748efb

·

1 Parent(s): ba42d51

Training configuration

Files changed (1) hide show

training_config.yaml +58 -0

training_config.yaml ADDED Viewed

	@@ -0,0 +1,58 @@

+# LibreHPS-4B training config — frozen for the v1.1 weights release.
+# This is an archive of the harness config used to produce the
+# released weights and is provided for reproducibility. It is NOT the
+# config you would use for new training runs (paths point inside the
+# original training tree).
+model:
+  name_or_path: "Qwen/Qwen3.5-4B-Base"
+  is_moe: false
+  reward_token: "<|Reward|>"
+  dropout: 0.05
+data:
+  index_parquet: "librehps/dataset/index.parquet"
+  num_video_frames: 6
+  modalities: null   # all of image | video | scalar_image | scalar_video
+  max_length: 2048
+optimizer:
+  visual_lr: 2.0e-6
+  merger_lr: 2.0e-6
+  rm_head_lr: 2.0e-6
+  special_token_lr: 2.0e-6
+  weight_decay: 0.0
+  betas: [0.9, 0.95]
+scheduler:
+  kind: "constant_with_warmup"
+  warmup_ratio: 0.05
+training:
+  global_batch_size: 384
+  per_gpu_micro_batch: 12
+  gradient_accumulation: 4
+  epochs: 1
+  lambda_reg: 0.1
+  lambda_router: 0.0    # dense — router loss inactive
+  seed: 42
+  # Cap optimiser steps for bounded runs. null = run to ``epochs``
+  # completion. Overridable via ``--smoke-steps N`` CLI or
+  # ``LIBREHPS_SMOKE_STEPS=N`` env var.
+  max_train_steps: null
+precision:
+  kind: "bf16"
+deepspeed:
+  config_path: "librehps/config/ds_config/zero3.json"
+fa4:
+  cache_enabled: true
+  cache_dir: "/home/train/.cache/flash_attn_cute_dsl"
+logging:
+  every_steps: 10
+checkpoint:
+  dir: "checkpoints/LibreHPS_4B"
+  every_steps: 500