canrager
/

temporalSAEs

canrager commited on Oct 27, 2025

Commit

10e8c61

verified ·

1 Parent(s): cc40ade

Upload llama-3.1-8B/layer_26/temporal/conf.yaml with huggingface_hub

Files changed (1) hide show

llama-3.1-8B/layer_26/temporal/conf.yaml ADDED Viewed

+data:
+  batch_size: 100
+  cache_dir: ../../activations/precomputed_activations/precomputed_activations_llama3/
+  context_length: 500
+  dtype: bfloat16
+  epochs: 1
+  hf_name: monology/pile-uncopyrighted
+  num_total_steps: 200000
+  num_workers: 2
+deploy: true
+device_id: cuda:0
+eval:
+  save_tables: false
+llm:
+  dimin: 4096
+  model_hf_name: meta-llama/Llama-3.1-8B
+  tokenizer_hf_name: meta-llama/Llama-3.1-8B
+log:
+  log_interval: 10
+  save_interval: 1000
+  save_multiple: false
+  wandb_project_name: TemporalSAE_test
+optimizer:
+  beta1: 0.9
+  beta2: 0.95
+  decay_lr: true
+  grad_clip: 1.0
+  learning_rate: 0.001
+  min_lr: 0.0009
+  warmup_iters: 200
+  weight_decay: 0.0001
+sae:
+  block_id: 1
+  bottleneck_factor: 1
+  exp_factor: 4
+  gamma_reg: 10
+  kval_topk: 256
+  n_attn_layers: 1
+  n_heads: 4
+  sae_diff_type: topk
+  sae_type: temporal
+  scaling_factor: 0.029
+  tied_weights: true
+seed: 42
+tag: scratch