nn: model: GeometricTransformer dataset: GeometricTransformerERADatasetPretrain dtype: float32 device: cuda data_in_memory: false load_model: /data2/scratch/group_scratch/era/protein_era/models/esm3clm/esm3_clm.pt batch_size: 64 num_workers: 4 collate_fn: geometric_transformer_era_pretrain_collate_fn model_args: dim_model: 1536 unified_transformer_args: n_layers: 48 geom_layer_indices: - 0 mha_layer_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 - 40 - 41 - 42 - 43 - 44 - 45 - 46 - 47 bias: false mha_args: num_heads: 24 bias: false qk_layernorm: true gha_args: num_heads: 256 num_vector_messages: 1 mask_and_zero_frameless: true bias: false scaling_factor: 1.1547005383792515 ffn_type: swiglu norm_type: layer_norm expansion_ratio: 2.66666666667 ida_layer_indices: [] struc_token_info: mask: 4096 eos: 4097 bos: 4098 pad: 4099 total: 5001 max_non_special_token: 4095 residue_token_info: mask: 32 eos: 2 bos: 0 pad: 1 total: 33 max_non_special_token: null sasa_token_info: mask: 0 eos: 0 bos: 0 pad: 0 total: null max_non_special_token: null sec_struct_token_info: mask: 0 eos: 0 bos: 0 pad: 0 total: null max_non_special_token: null res_annot_token_info: mask: 0 eos: 0 bos: 0 pad: 0 total: null max_non_special_token: null dataset_split_args: train: 1.0 val: 0.0 test: 0.0 train: lightning_model: BidirectionalModel resume_training_path: lightning_logs/version_7673/checkpoints/step_step=100000.ckpt lightning_model_args: eval_type: era beta: -10.0 gamma: 0 sampling_temperature: 1.0 optimizer: AdamW optimizer_args: lr: 1.0e-06 betas: - 0.9 - 0.99 weight_decay: 0.01 lr_scheduler: null interval: step monitor: train/ERALoss sync_dist: true on_step: true lr_scheduler_args: null trainer_args: accelerator: cuda devices: 4 precision: 16-mixed log_every_n_steps: 50 max_epochs: 1 enable_progress_bar: false gradient_clip_val: 1.0 strategy: DDPStrategy strategy_args: find_unused_parameters: true every_epoch_checkpoint_args: filename: step_{step:02d} every_n_epochs: null every_n_train_steps: 50000 save_top_k: -1 best_checkpoint_args: filename: best_model monitor: train/ERALoss mode: min save_top_k: 1 logger: loggertype: TensorBoard logger_args: version: null seed_args: seed: 42 workers: true global_args: dataset_filename: alignment_dataset.h5 keys_to_test: - nn.model - nn.model_args