| RC_augmentation: false |
| _dataset_cfg_lookup: |
| gencode128k_basic: |
| hf_path: jzshared/gencode128k_basic |
| path: data/gencode128k_basic |
| type: refseq |
| gencode128k_debug: |
| hf_path: jzshared/gencode128k_debug |
| path: data/gencode128k_debug |
| type: refseq |
| gencode_human_12.8k: |
| hf_path: jzshared/gencode_human_12.8k |
| path: data/gencode_human_12.8k |
| type: refseq |
| gencode_human_128k: |
| hf_path: jzshared/gencode_human_128k |
| path: data/gencode_human_128k |
| type: refseq |
| hg38_128k: |
| hf_path: jzshared/hg38_cds_anchored_128000 |
| path: data/hg38_cds_anchored_128000 |
| type: refseq |
| hg38_12k: |
| hf_path: jzshared/hg38_12800 |
| path: data/hg38_cds_anchored_len12800_mincds150_1000000samples |
| type: refseq |
| hg38_cds_4m: |
| hf_path: null |
| path: data/hg38_cds_dataset_4m_filtered |
| type: refseq |
| alias: CKPT_DEBUG |
| alpha_exp: 1.0 |
| alpha_max: 0.03 |
| arch: hnet |
| batch_size: 32 |
| bp_per_token: 3 |
| cluster: mila |
| cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k |
| model=hnet/mamba_64m_2dc max_len=12800 batch_size=32 grad_acc_steps=1 max_train_steps=20 |
| eval_steps=10 save_steps=10 alpha_max=0.03 use_routing_floor=false strictness_max=0 |
| region_info=promoter1_cds1_utr1_exon1_intron1_nig1_dig1 alias=CKPT_DEBUG bp_per_token=3 |
| use_wandb=true upload_to_hf=true hf_repo=jzshared/ckpt_debug |
| config_path: null |
| data: gencode_human_12.8k |
| data_alias: ${.data}_${max_len} |
| dataset: ${_dataset_cfg_lookup[${data}]} |
| device: cuda |
| device_type: GPU |
| dirs: |
| data_cache: ${project_root}/data_cache/ |
| data_storage: ${project_root}/data/ |
| hydra: ${project_root}/temp/hydra/ |
| output: ${project_root}/output/${data_alias}/${alias}/ |
| temp: ${project_root}/temp/working_dir/${uid}/ |
| wandb_cache: ${oc.env:WANDB_CACHE_DIR,${project_root}/temp/wandb_cache/} |
| epochs: 200 |
| eval_batch_size: ${batch_size} |
| eval_steps: 10 |
| grad_acc_steps: 1 |
| hf_repo: jzshared/ckpt_debug |
| hf_repo_owner: jzshared |
| is_distributed: true |
| local_rank: 0 |
| logging: |
| level: info |
| log_wandb_metric_to_stdout: true |
| lr: 0.001 |
| master_port: '46235' |
| max_data_samples: null |
| max_eval_samples: 1000 |
| max_len: 12800 |
| max_length: ${max_len} |
| max_train_steps: 20 |
| min_routing_tokens: 8 |
| mode: Stage1 |
| model: |
| arch: hnet |
| name: hnet_mamba_64m_2dc |
| model_alias: ${oc.select:model.name,UnknownModel} |
| model_cfg: |
| arch_layout: |
| - m2 |
| - - m2 |
| - - m15 |
| - m2 |
| - m2 |
| attn_cfg: |
| num_heads: |
| - 8 |
| - 8 |
| - 12 |
| rotary_emb_dim: |
| - 16 |
| - 16 |
| - 24 |
| window_size: |
| - 511 |
| - 511 |
| - -1 |
| d_intermediate: |
| - 0 |
| - 0 |
| - 2048 |
| d_model: |
| - 512 |
| - 512 |
| - 768 |
| min_routing_tokens: ${min_routing_tokens} |
| n_gpt: 1.0 |
| r_hi: ${r_hi} |
| r_low: ${r_low} |
| r_warm_up_end: ${r_warm_up_end} |
| r_warm_up_start: ${r_warm_up_start} |
| ssm_cfg: |
| chunk_size: 256 |
| d_conv: 4 |
| d_state: 64 |
| expand: 2 |
| head_dim: 64 |
| tie_embeddings: true |
| vocab_size: 12 |
| name: hnet_base |
| private: false |
| project_root: ${hydra:runtime.cwd} |
| r_hi: 0.3 |
| r_low: 0.0 |
| r_warm_up_end: 750 |
| r_warm_up_start: 200 |
| rank: 0 |
| reference_loss: null |
| region_info: promoter1_cds1_utr1_exon1_intron1_nig1_dig1 |
| save_steps: 10 |
| seed: 0 |
| source: ${dataset.type} |
| strictness_exp: 1.0 |
| strictness_max: 0 |
| tokenizer: fast |
| training: |
| adam_beta1: 0.9 |
| adam_beta2: 0.95 |
| bf16: true |
| dataloader_drop_last: true |
| dataloader_num_workers: 1 |
| disable_tqdm: false |
| do_train: true |
| eval_steps: ${eval_steps} |
| eval_strategy: steps |
| gradient_accumulation_steps: ${grad_acc_steps} |
| gradient_checkpointing: false |
| group_by_length: false |
| label_names: |
| - input_ids |
| learning_rate: ${lr} |
| logging_steps: 10 |
| lr_scheduler_type: linear |
| max_grad_norm: 2.0 |
| max_train_steps: ${max_train_steps} |
| num_train_epochs: ${epochs} |
| output_dir: ${dirs.output} |
| overrides: {} |
| per_device_eval_batch_size: ${eval_batch_size} |
| per_device_train_batch_size: ${batch_size} |
| remove_unused_columns: false |
| report_to: null |
| save_steps: ${save_steps} |
| save_strategy: steps |
| use_lr_multiplier: true |
| warmup_steps: 500 |
| weight_decay: 0.1 |
| training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len} |
| uid: 98p9y5w8 |
| upload_to_hf: true |
| use_routing_floor: false |
| use_wandb: true |
| valid_test_downsample: null |
| version: NA |
| wandb: |
| dir: ${dirs.wandb_cache} |
| entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}} |
| id: 98p9y5w8 |
| mode: online |
| name: CKPT_DEBUG |
| project: ${oc.select:env.vars.wandb_proj,DNAFM} |
| step_metric: null |
| tags: |
| - ${mode} |
| url: https://wandb.ai/jzshared/DNAFM/runs/98p9y5w8 |
| warmup_steps: 0 |
| world_size: 4 |
|
|