hrm-mini / model_config.json
hexmage
Upload folder using huggingface_hub
4cd2f3f verified
Invalid JSON: Unexpected token 'a', "arch: H_"... is not valid JSON
arch:
H_cycles: 2
L_cycles: 6
bptt: true
forward_dtype: bfloat16
head_dim: 64
hidden_size: 512
intermediate_size: 2048
name: hrm@HRM
norm_eps: 1.0e-06
num_layers: 2
rope_theta: 10000.0
beta1: 0.9
beta2: 0.95
cycles_per_data: 16
data:
augment: true
dataset_name: /sg-pretrain/datasets/sudoku-extreme-1k
name: sudoku
repeat: 200
ema: 0.999
epochs: 20
local_batch_size: 96
log_interval: 5
lr: 0.0001
lr_min_ratio: 1.0
lr_warmup_steps: 2000
weight_decay: 1.0